Ejemplo n.º 1
0
CLI.add_argument(
    "--move_after_loading",
    type=str,
    default=
    'no'  # default is to append new records instead of dropping and reloading everything
)
CLI.add_argument(
    "--move_to_folder",
    type=str,
    default=
    ''  # default is to append new records instead of dropping and reloading everything
)

# parse the command line
args = CLI.parse_args()
folders_to_load = args.folders
format = args.format.lower()
header = args.header.lower()
mode = args.mode.lower()
move_after_loading = args.move_after_loading.lower()
move_to_folder = args.move_to_folder.lower()

# call function with command line arguments
for folder in folders_to_load:
    s3_to_postGIS(folder_to_load=folder,
                  AWS_Credentials=get_connection_strings("AWS_DEV"),
                  format=format,
                  header=header,
                  mode=mode,
                  move_after_loading=move_after_loading,
                  move_to_folder=move_to_folder)
Ejemplo n.º 2
0
def s3_to_postGIS(folder_to_load: str, AWS_Credentials: dict, format: str,
                  header: str, mode: str, move_after_loading: str,
                  move_to_folder: str):

    # set up S3 and RDS connections
    s3_resource = boto3.resource(
        's3',
        aws_access_key_id=AWS_Credentials['aws_access_key_id'],
        aws_secret_access_key=AWS_Credentials['aws_secret_access_key'])
    bucket_name = AWS_Credentials['s3_bucket']
    bucket = s3_resource.Bucket(bucket_name)
    region = AWS_Credentials['region']
    dbname = 'postgres'
    env = "DEV"
    engine = create_postgres_engine(destination="AWS_PostGIS",
                                    target_db=dbname,
                                    env=env)
    db_credentials = get_connection_strings("AWS_PostGIS")
    db_uid = db_credentials[env]['UID']
    db_pwd = db_credentials[env]['PWD']
    db_host = db_credentials[env]['HOST']
    db_port = db_credentials[env]['PORT']

    # add psql install location to default path
    psql_path = subprocess.check_output(['which',
                                         'psql']).strip().decode('utf-8')
    sys.path.append(psql_path)

    # grab list of all files in target folder that have a target table
    # url encode the file key so the ones with semicolons don't throw an error
    # update january 2021: the script now throws an error if i try to feed it URL-encoded object keys, so just using the plain text one now
    files_to_load = [(urllib.parse.quote(obj.key),
                      obj.key, obj.Object().metadata['target_schema'],
                      obj.Object().metadata['target_table'])
                     for obj in bucket.objects.filter(Prefix=folder_to_load)
                     if 'target_table' in obj.Object().metadata.keys()
                     if format in obj.key]
    # generate distinct list of target tables so they're all only dropped and recreated/truncated one time
    target_tables = [(target_schema, target_table)
                     for (file_name, file_name_native, target_schema,
                          target_table) in files_to_load]
    target_tables_distinct = set(target_tables)
    target_tables = list(target_tables_distinct)

    # drop and recreate and/or truncate each target table
    for (target_schema, target_table) in target_tables:
        generate_table(engine=engine,
                       target_schema=target_schema,
                       target_table=target_table,
                       mode=mode)

    # set table import parameters that are the same for every file
    copy_parameters = '\'(FORMAT {}, HEADER {})\''.format(format, header)
    columns_to_copy = '\'\''
    aws_credentials_param = '\'{}\', \'{}\',\'\''.format(
        AWS_Credentials['aws_access_key_id'],
        AWS_Credentials['aws_secret_access_key'])

    # create file-specific table import parameters
    for (file_name, file_name_native, target_schema,
         target_table) in files_to_load:
        destination_table = '\'{}.{}\''.format(target_schema, target_table)
        create_s3_uri_param = '\'{}\', \'{}\',\'{}\''.format(
            AWS_Credentials['s3_bucket'], file_name_native, region)
        base_file_name = os.path.basename(file_name_native)

        # create import statement
        import_table_query = 'SELECT aws_s3.table_import_from_s3({}, {},{}, aws_commons.create_s3_uri({}) ,aws_commons.create_aws_credentials({}));'.format(
            destination_table, columns_to_copy, copy_parameters,
            create_s3_uri_param, aws_credentials_param)
        # create arg to pass to os.system
        os_system_arg = 'PGPASSWORD=\'{}\' psql --host={} --port={} --username={} --dbname={}  --no-password --command=\"{}\"'.format(
            db_pwd, db_host, db_port, db_uid, dbname, import_table_query)
        # execute
        if move_after_loading != 'yes':
            os.system(os_system_arg)
        elif move_after_loading == 'yes' and move_to_folder != '':
            os.system(os_system_arg)
            try:
                s3_resource.Object(bucket_name,
                                   move_to_folder + base_file_name).copy_from(
                                       CopySource={
                                           'Bucket': bucket_name,
                                           'Key': file_name_native
                                       })
                s3_resource.Object(bucket_name, file_name_native).delete()
            except:
                print(file_name_native, " could not be copied and/or deleted")
                continue
        else:
            print("please provide move-to folder")
            continue

    # after data is loaded, update the geographies
    for (target_schema, target_table) in target_tables:
        correct_geo(engine=engine,
                    target_schema=target_schema,
                    target_table=target_table,
                    mode=mode)
import sqlalchemy
from connect_to_rds import get_connection_strings, create_postgres_engine
from add_location_info import add_location_info,add_school_info,add_roadway_info, add_intersection_info,create_final_table
from get_address import GeoLoc
import urllib 

dbname='postgres'
env="DEV"
engine = create_postgres_engine(destination="AWS_PostGIS", target_db=dbname, env=env)
db_credentials = get_connection_strings("AWS_PostGIS")
GOOGLE_API_KEY = get_connection_strings("GOOGLE_MAPS")["API_Key"]

# First move all source data records to a temp table
step_1_query="""
DROP TABLE IF EXISTS tmp.moving_violations;

CREATE TABLE tmp.moving_violations as 
SELECT * FROM source_data.moving_violations;

CREATE INDEX IF NOT EXISTS mv_location_index ON tmp.moving_violations (location);
"""

engine.execute(step_1_query)
print("temp table created")

# create the geocodes table if they don't already exist
create_tables_query = """
CREATE TABLE IF NOT EXISTS source_data.mv_geocodes (
    location varchar null
    ,lat numeric null
    ,long numeric null
import sqlalchemy
from connect_to_rds import get_connection_strings, create_postgres_engine
from add_location_info import add_location_info, add_school_info, create_final_table

dbname = 'postgres'
env = "DEV"
engine = create_postgres_engine(destination="AWS_PostGIS",
                                target_db=dbname,
                                env=env)
db_credentials = get_connection_strings("AWS_PostGIS")

geography_levels = {
    'comp_plan_area': {
        'geo_boundaries_source_table': 'source_data.comp_plan_areas',
        'orig_field_name': 'name'
    },
    'census_tract': {
        'geo_boundaries_source_table': 'source_data.census_tracts',
        'orig_field_name': 'tract'
    },
    'nbh_cluster_names': {
        'geo_boundaries_source_table': 'source_data.neighborhood_clusters',
        'orig_field_name': 'nbh_names'
    },
    'ward_name': {
        'geo_boundaries_source_table': 'source_data.ward_boundaries',
        'orig_field_name': 'name'
    },
    'anc_id': {
        'geo_boundaries_source_table': 'source_data.anc_boundaries',
        'orig_field_name': 'anc_id'
                                   partition_by_field='objectid',
                                   within_distance=0.001)
    print("roadway info query complete")
    next_tables = add_intersection_info(
        engine=engine,
        target_schema='tmp',
        target_table='crashes_intersection_info',
        from_schema=next_tables[0],
        from_table=next_tables[1],
        partition_by_field='objectid',
        within_distance=10)
    print("intersection info query complete")
    row_count = create_final_table(engine=engine,
                                   target_schema='analysis_data',
                                   target_table='dc_crashes_w_details',
                                   from_schema=next_tables[0],
                                   from_table=next_tables[1])
    print("final query complete with row count ", row_count)


# command line arguments
CLI = argparse.ArgumentParser()
CLI.add_argument("--env", type=str)

# parse the command line
args = CLI.parse_args()
env = args.env

if __name__ == "__main__":
    generate_crashes_table(AWS_Credentials=get_connection_strings("AWS_DEV"),
                           env=env)
Ejemplo n.º 6
0
import datetime
from datetime import timezone
import time
from connect_to_rds import get_connection_strings
import pprint
import json
import boto3
import os
import pandas as pd
from pathlib import Path
import csv
from get_address import GeoLoc
import argparse

# get aws credentials
AWS_Credentials = get_connection_strings("AWS_DEV")
client = boto3.client(
    's3',
    aws_access_key_id=AWS_Credentials['aws_access_key_id'],
    aws_secret_access_key=AWS_Credentials['aws_secret_access_key'])
s3_resource = boto3.resource(
    's3',
    aws_access_key_id=AWS_Credentials['aws_access_key_id'],
    aws_secret_access_key=AWS_Credentials['aws_secret_access_key'])
bucket = s3_resource.Bucket(AWS_Credentials['s3_bucket'])
bucket_name = AWS_Credentials['s3_bucket']
region = AWS_Credentials['region']
GOOGLE_API_KEY = get_connection_strings("GOOGLE_MAPS")["API_Key"]

# set script variables
home = os.path.expanduser('~')
Ejemplo n.º 7
0
def create_test_db(env: str, test_db_name: str):
    # connect to the prod db
    engine = create_postgres_engine(destination="AWS_PostGIS", env="PROD")
    db_credentials = get_connection_strings("AWS_PostGIS")
    # get prod master credentials
    prod_db_host = db_credentials['PROD']['HOST']
    prod_db_port = db_credentials['PROD']['PORT']
    prod_db_name = db_credentials['PROD']['DB']
    prod_db_uid = db_credentials['PROD']['UID']
    prod_db_pwd = db_credentials['PROD']['PWD']
    # get testdb credentials
    test_db_host = db_credentials[env.upper()]['HOST']
    test_db_port = db_credentials[env.upper()]['PORT']
    test_db_name = db_credentials[env.upper()]['DB']
    test_db_uid = db_credentials[env.upper()]['UID']
    test_db_pwd = db_credentials[env.upper()]['PWD']
    test_db_users = db_credentials[env.upper()]['USERS']

    kill_db_query = """
    SELECT	pg_terminate_backend (pid)
    FROM	pg_stat_activity
    WHERE	pg_stat_activity.datname = '{0}';
    """.format(test_db_name)

    # kill
    engine.execute(kill_db_query)

    # drop
    command = 'DROP DATABASE IF EXISTS {}'.format(test_db_name)
    os_system_arg = 'PGPASSWORD=\'{}\' psql --host={} --port={} --username={} --dbname={} --no-password --command=\"{}\"'.format(
        prod_db_pwd, prod_db_host, prod_db_port, prod_db_uid, prod_db_name,
        command)
    os.system(os_system_arg)

    # create
    command = 'CREATE DATABASE {}'.format(test_db_name)
    os_system_arg = 'PGPASSWORD=\'{}\' psql --host={} --port={} --username={} --dbname={} --no-password --command=\"{}\"'.format(
        prod_db_pwd, prod_db_host, prod_db_port, prod_db_uid, prod_db_name,
        command)
    os.system(os_system_arg)

    # create users on new db
    for user_pwd in test_db_users:
        user = list(user_pwd.keys())[0]
        pwd = user_pwd[user]
        command = 'CREATE ROLE {} WITH LOGIN ENCRYPTED PASSWORD \'{}\';'.format(
            user, pwd)
        os_system_arg = 'PGPASSWORD=\'{}\' psql -h {} -p {} -U {} --dbname={} --no-password --command=\"{}\"'.format(
            test_db_pwd, test_db_host, test_db_port, test_db_uid, test_db_name,
            command)
        print(os_system_arg)
        os.system(os_system_arg)

    # install PostGIS extensions
    command = """
    CREATE EXTENSION postgis;
    CREATE EXTENSION fuzzystrmatch;
    CREATE EXTENSION postgis_tiger_geocoder;
    CREATE EXTENSION postgis_topology;
    """
    os_system_arg = 'PGPASSWORD=\'{}\' psql -h {} -p {} -U {} --dbname={} --no-password --command=\"{}\"'.format(
        test_db_pwd, test_db_host, test_db_port, test_db_uid, test_db_name,
        command)
    os.system(os_system_arg)
    # alter schemas
    command = """
    ALTER SCHEMA tiger OWNER TO rds_superuser;
    ALTER SCHEMA tiger_data OWNER TO rds_superuser;
    ALTER SCHEMA topology OWNER TO rds_superuser;
    """
    os_system_arg = 'PGPASSWORD=\'{}\' psql -h {} -p {} -U {} --dbname={} --no-password --command=\"{}\"'.format(
        test_db_pwd, test_db_host, test_db_port, test_db_uid, test_db_name,
        command)
    os.system(os_system_arg)
    # create function
    command = 'CREATE FUNCTION exec(text) returns text language plpgsql volatile AS \$f\$ BEGIN EXECUTE \$1; RETURN \$1; END; \$f\$;'
    os_system_arg = 'PGPASSWORD=\'{}\' psql -h {} -p {} -U {} --dbname={} --no-password --command=\"{}\"'.format(
        test_db_pwd, test_db_host, test_db_port, test_db_uid, test_db_name,
        command)
    os.system(os_system_arg)
    # execute function
    command = """
    SELECT exec('ALTER TABLE ' || quote_ident(s.nspname) || '.' || quote_ident(s.relname) || ' OWNER TO rds_superuser;')
        FROM (
            SELECT nspname, relname
            FROM pg_class c JOIN pg_namespace n ON (c.relnamespace = n.oid) 
            WHERE nspname in ('tiger','topology') AND
            relkind IN ('r','S','v') ORDER BY relkind = 'S')
        s;
    """
    os_system_arg = 'PGPASSWORD=\'{}\' psql -h {} -p {} -U {} --dbname={} --no-password --command=\"{}\"'.format(
        test_db_pwd, test_db_host, test_db_port, test_db_uid, test_db_name,
        command)
    os.system(os_system_arg)

    # get all schemas on prod db
    schemas = [
        r for (r, ) in engine.execute(
            "select distinct table_schema from information_schema.tables where is_insertable_into = 'YES' and table_schema not like 'pg_%%'"
        ).fetchall()
    ]

    # create engine on test db
    test_engine = create_postgres_engine(destination="AWS_PostGIS",
                                         env=env.upper())

    # create schemas
    for schema in schemas:
        create_schema_query = """
        CREATE SCHEMA IF NOT EXISTS {0};
        GRANT ALL PRIVILEGES ON SCHEMA {0} TO PUBLIC;
        """.format(schema)

        test_engine.execute(create_schema_query)
Ejemplo n.º 8
0
import tweepy
import datetime 
from datetime import timezone
import time
from connect_to_rds import get_connection_strings
import pprint 
import json
import boto3 
import os 
import pandas as pd
from pathlib import Path

# get aws credentials
AWS_Credentials = get_connection_strings("AWS_DEV")
s3 = boto3.client('s3'
    ,aws_access_key_id=AWS_Credentials['aws_access_key_id']
    ,aws_secret_access_key=AWS_Credentials['aws_secret_access_key'])
s3_resource = boto3.resource('s3'
    ,aws_access_key_id=AWS_Credentials['aws_access_key_id']
    ,aws_secret_access_key=AWS_Credentials['aws_secret_access_key'])
bucket = s3_resource.Bucket(AWS_Credentials['s3_bucket'])
region=AWS_Credentials['region']
# set script variables
home = os.path.expanduser('~')
destination_folder='source-data/twitter/all_time/'
filename_stem = 'twitter_search_'
date_tuples=[("201501010000","202012310000")]

environment_name ='dev'
current_date=datetime.datetime.now(timezone.utc).strftime("%Y-%m-%d")
Ejemplo n.º 9
0
def refresh_test_db(env: str):
    engine = create_postgres_engine(destination="AWS_PostGIS", env=env.upper())
    db_credentials = get_connection_strings("AWS_PostGIS")
    db_users = db_credentials[env.upper()]['USERS']
    prod_db_host = db_credentials['PROD']['HOST']
    prod_db_port = db_credentials['PROD']['PORT']
    prod_db_name = db_credentials['PROD']['DB']
    prod_engine = create_postgres_engine(destination="AWS_PostGIS", env="PROD")

    create_fdw_query = """
        BEGIN;
        CREATE EXTENSION IF NOT EXISTS postgres_fdw;
        DROP SERVER IF EXISTS prod CASCADE;
        CREATE SERVER prod FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host '{prod_db_host}', dbname '{prod_db_name}');
        COMMIT;
    """.format(prod_db_name=prod_db_name, prod_db_host=prod_db_host)

    engine.execute(create_fdw_query)

    #  create user mappings
    for user_pwd in db_users:
        user = list(user_pwd.keys())[0]

        pwd = user_pwd[user]
        map_user_query = """
        CREATE USER MAPPING FOR {user}
            SERVER prod
            OPTIONS (user '{user}', password '{pwd}');
        """.format(user=user, pwd=pwd)

        engine.execute(map_user_query)

    # pull the schemas off the viz copy of the prod database
    schemas = [(r, 'prod_' + r) for (r, ) in prod_engine.execute(
        "select distinct table_schema from information_schema.tables where is_insertable_into = 'YES' and table_schema not like 'pg_%%'"
    ).fetchall()]

    # map schemas
    for source_schema, destination_schema in schemas:
        create_schema_query = """
        CREATE SCHEMA IF NOT EXISTS {destination_schema};
        GRANT ALL PRIVILEGES ON SCHEMA {destination_schema} TO PUBLIC;
        IMPORT FOREIGN SCHEMA {source_schema}
            FROM SERVER prod
            INTO {destination_schema};
        """.format(source_schema=source_schema,
                   destination_schema=destination_schema)

        engine.execute(create_schema_query)

    # pull all the tables from prod db
    schemas_tables = [(schema, table) for (
        schema, table
    ) in prod_engine.execute(
        "select distinct table_schema,table_name from information_schema.tables where is_insertable_into = 'YES' and table_schema not like 'pg_%%' and table_name not like '[%%]'"
    ).fetchall()]

    #  create and populate tables
    for schema, table in schemas_tables:
        create_populate_tables_query = """
        CREATE TABLE IF NOT EXISTS {schema}."{table}" (LIKE prod_{schema}."{table}");

        DELETE FROM {schema}."{table}";

        INSERT INTO {schema}."{table}"
            SELECT * FROM prod_{schema}."{table}";

        GRANT ALL PRIVILEGES ON {schema}."{table}" TO PUBLIC;
        """.format(schema=schema, table=table)

        print(create_populate_tables_query)
        engine.execute(create_populate_tables_query)
Ejemplo n.º 10
0
import boto3
import os
from pathlib import Path
from connect_to_rds import get_connection_strings
import datetime
from datetime import timezone
import requests
import json
import sys
import csv

Census_API_Key = get_connection_strings('CENSUS')['API_Key']

AWS_Credentials = get_connection_strings('AWS_DEV')

sys.path.append(os.path.expanduser('~'))

s3 = boto3.resource(
    's3',
    aws_access_key_id=AWS_Credentials['aws_access_key_id'],
    aws_secret_access_key=AWS_Credentials['aws_secret_access_key'])
client = boto3.client(
    's3',
    aws_access_key_id=AWS_Credentials['aws_access_key_id'],
    aws_secret_access_key=AWS_Credentials['aws_secret_access_key'])
bucket_name = AWS_Credentials['s3_bucket']
region = AWS_Credentials['region']

census_endpoint = "https://api.census.gov/data/2019/acs/acs5"

metadata = {
Ejemplo n.º 11
0
def geocode_text(engine, records_to_geocode: list, administrative_area: str,
                 **kwargs):

    text_type = kwargs.get('text_type', 'Unknown')

    # create tables if they don't already exist
    create_geocodes_location_table = """
    CREATE TABLE IF NOT EXISTS source_data.geocoded_text (
        GET_DATETIME TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
        ,text varchar null
        ,text_type varchar null
        ,point_type varchar null
        ,point_geography geography null
        ,polygon_geography geography null
        ,administrative_area varchar null
    )
    """
    engine.execute(create_geocodes_location_table)

    create_missing_geocodes_location_table = """
    CREATE TABLE IF NOT EXISTS source_data.geocoded_text_fails (
        GET_DATETIME TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
        ,text varchar null
        ,text_type varchar null
        ,administrative_area varchar null
    )
    """
    engine.execute(create_missing_geocodes_location_table)

    # grab the google API key
    GOOGLE_API_KEY = get_connection_strings("GOOGLE_MAPS")["API_Key"]

    # pull records that were already geocoded and either succeeded or failed
    already_geocoded_records = [
        r for (r, ) in engine.execute(
            "select distinct text from source_data.geocoded_text").fetchall()
    ]
    already_failed_records = [
        r for (r, ) in engine.execute(
            "select distinct text from source_data.geocoded_text_fails").
        fetchall()
    ]

    # limit records to only those that haven't already been checked
    new_records = [
        r for r in records_to_geocode if r not in already_failed_records
        if r not in already_geocoded_records
    ]
    print(len(records_to_geocode), " records provided, ", len(new_records),
          " of them need to be geocoded")

    # then using the google maps API, add a lat and long for addresses that don't have them
    for record in new_records:
        address = str(record)
        try:
            geo_loc_instance = GeoLoc(GOOGLE_API_KEY)
            geocode_location = geo_loc_instance.GetGeoLoc(
                test_address=address,
                test_administrative_area=administrative_area)
            # insert into the table
            point_lat = geocode_location['geometry']['location']['lat']
            point_long = geocode_location['geometry']['location']['lng']
            point_type = geocode_location['geometry']['location_type']
            ne_lat = geocode_location['geometry']['viewport']['northeast'][
                'lat']
            ne_long = geocode_location['geometry']['viewport']['northeast'][
                'lng']
            sw_lat = geocode_location['geometry']['viewport']['southwest'][
                'lat']
            sw_long = geocode_location['geometry']['viewport']['southwest'][
                'lng']

            insert_record_query = """
                INSERT INTO source_data.geocoded_text (text,text_type, point_type,point_geography,polygon_geography,administrative_area)
                SELECT '{address}', '{text_type}', '{point_type}'
                ,ST_SetSRID(ST_MakePoint({point_long}, {point_lat}),4326)::geography
                ,ST_Polygonize(results)::geography
                ,'{administrative_area}'
                FROM 
                (SELECT
                    ST_MakeLine(
                        ST_SetSRID(ST_MakePoint({sw_long},{sw_lat}), 4326), ST_SetSRID(ST_MakePoint({ne_long},{sw_lat}),4326)
                        ) AS results
                UNION
                SELECT
                    ST_MakeLine(
                        ST_SetSRID(ST_MakePoint({ne_long},{sw_lat}), 4326), ST_SetSRID(ST_MakePoint({ne_long},{ne_lat}),4326)
                        )
                UNION
                SELECT    
                    ST_MakeLine(
                        ST_SetSRID(ST_MakePoint({ne_long},{ne_lat}), 4326), ST_SetSRID(ST_MakePoint({sw_long},{ne_lat}),4326)
                        )   
                UNION
                SELECT    
                    ST_MakeLine(
                        ST_SetSRID(ST_MakePoint({sw_long},{ne_lat}), 4326), ST_SetSRID(ST_MakePoint({sw_long},{sw_lat}),4326)
                        )   ) as tmp;

                """.format(address=address,
                           text_type=text_type,
                           point_type=point_type,
                           point_long=point_long,
                           point_lat=point_lat,
                           sw_lat=sw_lat,
                           sw_long=sw_long,
                           ne_lat=ne_lat,
                           ne_long=ne_long,
                           administrative_area=administrative_area)
            engine.execute(insert_record_query)
        except Exception as error:
            # print("could not geocode record ", address)
            # replace single quotes to avoid insertion errors
            address = address.replace('\'', '')
            engine.execute(
                "INSERT INTO source_data.geocoded_text_fails (text, text_type,administrative_area) SELECT '{address}', '{text_type}','{administrative_area}'"
                .format(address=address,
                        text_type=text_type,
                        administrative_area=administrative_area))
Ejemplo n.º 12
0
    """

    final_query="""
    DROP TABLE IF EXISTS {0}.{1};

    CREATE TABLE {0}.{1} AS 
        SELECT * FROM tmp_pulsepoint_units_join;

    GRANT ALL PRIVILEGES ON {0}.{1} TO PUBLIC;
    """.format(target_schema, target_table)

    engine.execute(step1_query)
    engine.execute(step_2_query)
    engine.execute(step_3_query)
    engine.execute(final_query)


# command line arguments
CLI=argparse.ArgumentParser()
CLI.add_argument(
"--env",
type=str
)

# parse the command line
args = CLI.parse_args()
env=args.env

if __name__ == "__main__":
    generate_pulsepoint_table(AWS_Credentials=get_connection_strings("AWS_DEV"), env=env)
Ejemplo n.º 13
0
import sqlalchemy
from connect_to_rds import get_connection_strings, create_postgres_engine
import requests
import json

dbname = 'postgres'
env = "DEV"
engine = create_postgres_engine(destination="AWS_PostGIS",
                                target_db=dbname,
                                env=env)
db_credentials = get_connection_strings("AWS_PostGIS")
WALKSCORE_API_KEY = get_connection_strings("WALKSCORE")["API_Key"]
# walkscore-specific variables
walkscore_endpoint = 'https://api.walkscore.com/score'
format = 'json'

# First move all source data records to a temp table
step_1_query = """

CREATE TABLE IF NOT EXISTS tmp.address_walkscores as 
(
    SELECT * FROM 
    (  
        SELECT DISTINCT b.objectid as census_block_objectid, a.fulladdress, a.latitude, a.longitude, b.geography
            , ROW_NUMBER() over (PARTITION BY b.objectid ORDER BY a.fulladdress) AS row_num
        FROM source_data.address_points a
        INNER JOIN source_data.census_blocks b on ST_Intersects(a.geography::geometry, b.geography::geometry)
    ) as tmp WHERE ROW_NUM = 1
);

CREATE INDEX IF NOT EXISTS address_index ON tmp.address_walkscores (fulladdress);