def append_parcel_identifier(tbl, schema, tbl_geom, parcel_identifier): """Append parcel identifier to target table """ print 'Appending parcel identifier to %s' % tbl if not field_in_table(parcel_identifier, tbl): exec_sql("alter table %s.%s add %s integer default 0;" %(schema, tbl, parcel_identifier)) exec_sql("update %s.%s set %s = a.%s from parcels a where st_contains(a.geom, %s.%s.%s);" % (schema, tbl, parcel_identifier, parcel_identifier, schema, tbl, tbl_geom))
def lat_long_to_point_geometry(tbl, schema, x_col = 'longitude', y_col = 'latitude', geom_field_name = 'geom', target_srid = 2768): """Creates point geometry on db table based on lat long fields""" print 'Creating point geometry from lat/long in table %s.' % tbl if not field_in_table(geom_field_name, tbl): exec_sql("ALTER TABLE %s.%s ADD COLUMN %s geometry;" % (schema, tbl, geom_field_name)) exec_sql("UPDATE %s.%s SET %s = ST_GeomFromText('POINT(' || %s || ' ' || %s || ')',4326);" % (schema, tbl, geom_field_name, x_col, y_col)) exec_sql("CREATE INDEX %s_gidx on %s.%s using gist (%s);" % (tbl, schema, tbl, geom_field_name)) exec_sql("SELECT UpdateGeometrySRID('%s', '%s', '%s', %s);" % (schema, tbl, geom_field_name, target_srid)) exec_sql("UPDATE %s.%s SET %s = ST_TRANSFORM(ST_SetSRID(%s, 4326), %s);" % (schema, tbl, geom_field_name, geom_field_name, target_srid))
### UrbanCanvas ## spandex localhost db config db_config = dict(load_config().items('database')) ## UrbanCanvas db config urbancanvas_db_config = {'database': 'sandag', 'host': 'urbancanvas.cp2xwchuariu.us-west-2.rds.amazonaws.com', 'password': '******', 'port': '5432', 'user': '******'} ## if 'loading' schema not on localhost db, create. This schema is for tables to load to UrbanCanvas exec_sql("CREATE SCHEMA IF NOT EXISTS loading;") #UrbanCanvas exec_sql func, for executing sql on UrbanCanvas database def exec_sql2(query): print query conn_string = "host=urbancanvas.cp2xwchuariu.us-west-2.rds.amazonaws.com dbname='sandag' user='******' password='******' port=5432" import psycopg2 conn=psycopg2.connect(conn_string) cur = conn.cursor() cur.execute(query) conn.commit() cur.close() conn.close()
loader = TableLoader() t = loader.tables ###Tagging parcels with taz id based on point in poly tag(t.public.parcels, 'taz', t.staging.taz, 'taz_key') ###Deal with special cases where parcels do not fall within TAZ boundaries def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) exec_sql(""" CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) RETURNS integer AS $BODY$ BEGIN FOR i IN 0..maxslice LOOP IF st_expand(a,maxe*i/maxslice) && b THEN RETURN i; END IF; END LOOP; RETURN 99999999; END; $BODY$ LANGUAGE plpgsql IMMUTABLE COST 100; ALTER FUNCTION expandoverlap_metric(geometry, geometry, double precision, double precision) OWNER TO postgres """) if db_to_df("select exists (select 1 from pg_type where typname = 'pgis_nn');" ).values[0][0] == False: exec_sql("CREATE TYPE pgis_nn AS (nn_gid integer, nn_dist numeric(16,5))") exec_sql(""" CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ DECLARE strsql text; rec pgis_nn; ncollected integer; it integer; BEGIN ncollected := 0; it := 0; WHILE ncollected < numnn AND it <= maxslices LOOP strsql := 'SELECT currentit.' || sgid2field || ', st_distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || ' as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND st_distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND st_expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) || ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY st_distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' || CAST((numnn - ncollected) As varchar(200)); FOR rec in EXECUTE (strsql) LOOP IF ncollected < numnn THEN ncollected := ncollected + 1; RETURN NEXT rec; ELSE EXIT; END IF; END LOOP; it := it + 1; END LOOP; END $BODY$ LANGUAGE plpgsql STABLE COST 100 ROWS 1000; ALTER FUNCTION _pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres """) exec_sql(""" CREATE OR REPLACE FUNCTION pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ SELECT * FROM _pgis_fn_nn($1,$2, $3, $4, $5, $6, $7, $8); $BODY$ LANGUAGE sql STABLE COST 100 ROWS 1000; ALTER FUNCTION pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres """) exec_sql('drop table if exists parcels_outside_taz;')
for i in range(73): if i < 10: filename = 'p50%s_d00.shp' % i else: filename = 'p5%s_d00.shp' % i filepath = os.path.join(loader.get_path('puma_geom'), filename) if os.path.exists(filepath): subfile_name = filename[:-4] shapefiles['staging.%s' % subfile_name] = 'puma_geom/%s' % filename loader.load_shp_map(shapefiles) conform_srids(loader.srid, schema=loader.tables.staging, fix=True) exec_sql("DROP table if exists staging.puma00;") sql_str = "" for i in range(73): if i < 10: filename = 'p50%s_d00.shp' % i else: filename = 'p5%s_d00.shp' % i filepath = os.path.join(loader.get_path('puma_geom'), filename) if os.path.exists(filepath): subfile_name = filename[:-4] sql_str = sql_str + 'select area, perimeter, puma5, name, geom from staging.%s' % subfile_name if i < 72: sql_str = sql_str + ' UNION ALL '
###Database connection loader = TableLoader() t = loader.tables ###Tagging parcels with taz id based on point in poly tag(t.public.parcels, 'taz', t.staging.taz, 'taz_key') ###Deal with special cases where parcels do not fall within TAZ boundaries def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) exec_sql(""" CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) RETURNS integer AS $BODY$ BEGIN FOR i IN 0..maxslice LOOP IF st_expand(a,maxe*i/maxslice) && b THEN RETURN i; END IF; END LOOP; RETURN 99999999; END; $BODY$ LANGUAGE plpgsql IMMUTABLE COST 100; ALTER FUNCTION expandoverlap_metric(geometry, geometry, double precision, double precision) OWNER TO postgres """) if db_to_df("select exists (select 1 from pg_type where typname = 'pgis_nn');").values[0][0] == False: exec_sql("CREATE TYPE pgis_nn AS (nn_gid integer, nn_dist numeric(16,5))") exec_sql(""" CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ DECLARE strsql text; rec pgis_nn; ncollected integer; it integer; BEGIN ncollected := 0; it := 0; WHILE ncollected < numnn AND it <= maxslices LOOP strsql := 'SELECT currentit.' || sgid2field || ', st_distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || ' as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND st_distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND st_expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) || ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY st_distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' || CAST((numnn - ncollected) As varchar(200)); FOR rec in EXECUTE (strsql) LOOP IF ncollected < numnn THEN ncollected := ncollected + 1; RETURN NEXT rec; ELSE EXIT; END IF; END LOOP; it := it + 1; END LOOP; END $BODY$ LANGUAGE plpgsql STABLE COST 100 ROWS 1000; ALTER FUNCTION _pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres """) exec_sql(""" CREATE OR REPLACE FUNCTION pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ SELECT * FROM _pgis_fn_nn($1,$2, $3, $4, $5, $6, $7, $8); $BODY$ LANGUAGE sql STABLE COST 100 ROWS 1000; ALTER FUNCTION pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres """) exec_sql('drop table if exists parcels_outside_taz;')
nonres_sqft_zone = pd.DataFrame({'observed':parcels.groupby('taz').non_residential_sqft.sum(), 'target':targetunits.targetnonressqft}) # For all employment points, translate to nonres-sqft by multiplying by 250. # Filter out synthetic job-based buildings so that we keep only those that have no residential and have less than 500 existing sqft. # For each TAZ, calculate the difference needed to match aggregate target. # If need to increment nrsqft upwards, sort synthetic buildings by sqft and take the top x that covers the needed difference # If no valid job points and non existing nonres-sqft, introduce a synthetic building in the TAZ- equal to the target, and put it on the biggest parcel. # Do same in the case of no parcels (and add synthetic parcel) # Scale to match #No need to tag in imputation_flag column based on scaling- otherwise everything would be tagged. nonres_sqft_zone['difference'] = nonres_sqft_zone.target - nonres_sqft_zone.observed ##Append the unique parcel identifier to the establisment point records. if 'parcel_id' not in db_to_df("SELECT column_name FROM information_schema.columns WHERE table_name='establishment_points'").column_name.values: exec_sql("alter table staging.establishment_points add parcel_id integer default 0;") exec_sql("update staging.establishment_points set parcel_id = a.gid from parcels a where st_within(staging.establishment_points.geom, a.geom);") #Load the establishment points to be used for non-residential sqft imputation estab_points = db_to_df('select emp_here, naics2, parcel_id from staging.establishment_points;') estabs_joined = pd.merge(estab_points, parcels.reset_index(), left_on = 'parcel_id', right_on = 'gid') # Filter out some synthetic job-based buildings so that we keep only those that have no existing residential and have less than 500 existing non-residential sqft. estabs_joined = estabs_joined[np.logical_or((estabs_joined.residential_units == 0), (estabs_joined.residential_units.isnull())) & np.logical_or((estabs_joined.non_residential_sqft < 500), (estabs_joined.non_residential_sqft.isnull()))] # For all employment points, translate to nonres-sqft by multiplying by 250. estabs_joined['nrsqft_assumed'] = 250 * estabs_joined.emp_here #Aggregate employment-point-based-buildings to the parcel level estabs_joined_nrsqft = estabs_joined.groupby('gid').nrsqft_assumed.sum() estabs_joined_taz = estabs_joined.groupby('gid').taz.max()
parcels['costar_elevators'] = costar.groupby('gid').elevators.max() parcels['costar_rent'] = costar.groupby('gid').rent.max() ######## *UPDATE* ######## print 'Point-based impute done. Loading results back to db.' # Updated fields back to database parcels_imputed = parcels[['year_built', 'sqft_per_unit', 'non_residential_sqft', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'gov_type', 'gov_sqft', 'costar_property_type', 'costar_secondary_type', 'costar_building_name', 'costar_elevators', 'costar_rent', 'imputation_flag']] for col in ['redfin_home_type', 'costar_property_type', 'costar_secondary_type', 'costar_building_name', 'costar_rent']: parcels_imputed[col][parcels_imputed[col].isnull()] = '' parcels_imputed[col] = parcels_imputed[col].str.encode('utf-8') df_to_db(parcels_imputed, 'parcels_imputed', schema=loader.tables.staging) #Update the master parcel table on the database exec_sql("update parcels set year_built = a.year_built from staging.parcels_imputed a where a.gid = parcels.gid;") exec_sql("update parcels set sqft_per_unit = a.sqft_per_unit from staging.parcels_imputed a where a.gid = parcels.gid;") exec_sql("update parcels set non_residential_sqft = a.non_residential_sqft from staging.parcels_imputed a where a.gid = parcels.gid;") exec_sql("update parcels set imputation_flag = a.imputation_flag from staging.parcels_imputed a where a.gid = parcels.gid;") def add_field_and_populate_with_imputed_value(varname, vartype): if not field_in_table(varname, 'parcels'): exec_sql("ALTER TABLE parcels ADD COLUMN %s %s;" % (varname, vartype)) exec_sql("update parcels set %s = a.%s from staging.parcels_imputed a where a.gid = parcels.gid;" % (varname, varname)) add_field_and_populate_with_imputed_value('redfin_sale_price', 'numeric') add_field_and_populate_with_imputed_value('redfin_sale_year', 'numeric') add_field_and_populate_with_imputed_value('redfin_home_type', 'text') add_field_and_populate_with_imputed_value('gov_type', 'numeric') add_field_and_populate_with_imputed_value('gov_sqft', 'numeric') add_field_and_populate_with_imputed_value('costar_elevators', 'numeric')
exec_sql(""" -- SCL drop table if exists condos_scl; SELECT max(county_id) as county_id, max(apn) as apn, max(parcel_id_local) as parcel_id_local, max(land_use_type_id) as land_use_type_id, max(res_type) as res_type, sum(land_value) as land_value, sum(improvement_value) as improvement_value, max(year_assessed) as year_assessed, max(year_built) as year_built, sum(building_sqft) as building_sqft, sum(non_residential_sqft) as non_residential_sqft, sum(residential_units) as residential_units, max(sqft_per_unit) as sqft_per_unit, max(stories) as stories, max(tax_exempt) as tax_exempt, condo_identifier, ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom, max(gid) as gid, 'merged' as imputation_flag, max(development_type_id) as development_type_id into condos_scl FROM parcels where county_id = '085' AND land_use_type_id = 'RCON' AND length(condo_identifier)>3 GROUP BY condo_identifier; delete from parcels where county_id = '085' AND land_use_type_id = 'RCON' AND length(condo_identifier)>3 ; insert into parcels select * from condos_scl; -- CNC drop table if exists condos_cnc; SELECT max(county_id) as county_id, max(apn) as apn, max(parcel_id_local) as parcel_id_local, max(land_use_type_id) as land_use_type_id, max(res_type) as res_type, sum(land_value) as land_value, sum(improvement_value) as improvement_value, max(year_assessed) as year_assessed, max(year_built) as year_built, sum(building_sqft) as building_sqft, sum(non_residential_sqft) as non_residential_sqft, sum(residential_units) as residential_units, max(sqft_per_unit) as sqft_per_unit, max(stories) as stories, max(tax_exempt) as tax_exempt, condo_identifier, ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom, max(gid) as gid, 'merged' as imputation_flag, max(development_type_id) as development_type_id into condos_cnc FROM parcels where county_id = '013' AND land_use_type_id = '29' AND length(condo_identifier)>3 GROUP BY condo_identifier; delete from parcels where county_id = '013' AND land_use_type_id = '29' AND length(condo_identifier)>3 ; insert into parcels select * from condos_cnc; -- SON drop table if exists condos_son; SELECT max(county_id) as county_id, max(apn) as apn, max(parcel_id_local) as parcel_id_local, max(land_use_type_id) as land_use_type_id, max(res_type) as res_type, sum(land_value) as land_value, sum(improvement_value) as improvement_value, max(year_assessed) as year_assessed, max(year_built) as year_built, sum(building_sqft) as building_sqft, sum(non_residential_sqft) as non_residential_sqft, sum(residential_units) as residential_units, max(sqft_per_unit) as sqft_per_unit, max(stories) as stories, max(tax_exempt) as tax_exempt, condo_identifier, ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom, max(gid) as gid, 'merged' as imputation_flag, max(development_type_id) as development_type_id into condos_son FROM parcels where county_id = '097' AND length(condo_identifier)>3 and res_type = 'multi' GROUP BY condo_identifier; delete from parcels where county_id = '097' AND length(condo_identifier)>3 and res_type = 'multi'; insert into parcels select * from condos_son; """)
return sql.read_frame(query, conn) ##There are some duplicate parcel_id's in local_effect_distances, delete one record from each duplicate pair delete_dup_parcel_id_sql = ''' with a as (select parcel_id, count(*) as numparcels from staging.local_effect_distances group by parcel_id) , b as( select parcel_id from a where numparcels > 1 --These are the parcel_id's where one of two of each pair needs to be removed ) , c as( select max(ctid) as ctid_max from staging.local_effect_distances where parcel_id in (select parcel_id from b) group by parcel_id --These are the ctid's to delete ) delete from staging.local_effect_distances where ctid in (select ctid_max from c); ''' exec_sql(delete_dup_parcel_id_sql) #Join parcels to local_effect_distances parcel_join_sql = ''' DROP table if exists public.parcels; SELECT a.*, b.distance_to_coast, b.distance_to_freeway, b.distance_to_onramp, b.distance_to_park, b.distance_to_school, b.distance_to_transit into public.parcels from staging.parcels a join staging.local_effect_distances b on a.parcel_id = b.parcel_id; ALTER TABLE public.parcels DROP COLUMN gid; ALTER TABLE public.parcels RENAME COLUMN developmen TO development_type_id; ALTER TABLE public.parcels RENAME COLUMN parcel_acr TO parcel_acres; ALTER TABLE public.parcels RENAME COLUMN proportion TO proportion_undevelopable; ''' exec_sql(parcel_join_sql) #XY coords exec_sql("alter table public.parcels add centroid geometry;") exec_sql("update public.parcels set centroid = ST_centroid(geom);")
if grouper == 'taz': summary['residential_units_target'] = targetunits taz_df = summary output_tablename = 'summary_{}'.format(grouper) df_to_db(summary, output_tablename, schema=loader.tables.public) parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels') config = load_config() db_config = dict(config.items('database')) if 'taz_id' in db_to_df( "SELECT column_name FROM information_schema.columns WHERE table_name='parcel'" ).column_name.values: exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;") if 'parcel_acres' in db_to_df( "SELECT column_name FROM information_schema.columns WHERE table_name='parcel'" ).column_name.values: exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;") ## Export parcel shapefile to output directory os.system('pgsql2shp -f "%s" -h %s -u %s -P %s %s parcel' % (parcel_output_dir, db_config['host'], db_config['user'], db_config['password'], db_config['database'])) # Export simplified parcel shapefile os.system( 'pgsql2shp -f "%s" -h %s -u %s -P %s %s "select geom_id, parcel_id, st_simplifypreservetopology(geom, 100) from parcel"' % (parcel_output_dir, db_config['host'], db_config['user'], db_config['password'], db_config['database']))
if i < 10: filename = 'p50%s_d00.shp' % i else: filename = 'p5%s_d00.shp' % i filepath = os.path.join(loader.get_path('puma_geom'), filename) if os.path.exists(filepath): subfile_name = filename[:-4] shapefiles['staging.%s' % subfile_name] = 'puma_geom/%s' % filename loader.load_shp_map(shapefiles) conform_srids(loader.srid, schema=loader.tables.staging, fix=True) exec_sql("DROP table if exists staging.puma00;") sql_str = "" for i in range(73): if i < 10: filename = 'p50%s_d00.shp' % i else: filename = 'p5%s_d00.shp' % i filepath = os.path.join(loader.get_path('puma_geom'), filename) if os.path.exists(filepath): subfile_name = filename[:-4] sql_str = sql_str + 'select area, perimeter, puma5, name, geom from staging.%s' % subfile_name if i < 72: sql_str = sql_str + ' UNION ALL '
def add_field_and_populate_with_imputed_value(varname, vartype): if not field_in_table(varname, 'parcels'): exec_sql("ALTER TABLE parcels ADD COLUMN %s %s;" % (varname, vartype)) exec_sql("update parcels set %s = a.%s from staging.parcels_imputed a where a.gid = parcels.gid;" % (varname, varname))
# For each TAZ, calculate the difference needed to match aggregate target. # If need to increment nrsqft upwards, sort synthetic buildings by sqft and take the top x that covers the needed difference # If no valid job points and non existing nonres-sqft, introduce a synthetic building in the TAZ- equal to the target, and put it on the biggest parcel. # Do same in the case of no parcels (and add synthetic parcel) # Scale to match #No need to tag in imputation_flag column based on scaling- otherwise everything would be tagged. nonres_sqft_zone[ 'difference'] = nonres_sqft_zone.target - nonres_sqft_zone.observed ##Append the unique parcel identifier to the establisment point records. if 'parcel_id' not in db_to_df( "SELECT column_name FROM information_schema.columns WHERE table_name='establishment_points'" ).column_name.values: exec_sql( "alter table staging.establishment_points add parcel_id integer default 0;" ) exec_sql( "update staging.establishment_points set parcel_id = a.gid from parcels a where st_within(staging.establishment_points.geom, a.geom);" ) #Load the establishment points to be used for non-residential sqft imputation estab_points = db_to_df( 'select emp_here, naics2, parcel_id from staging.establishment_points;') estabs_joined = pd.merge(estab_points, parcels.reset_index(), left_on='parcel_id', right_on='gid') # Filter out some synthetic job-based buildings so that we keep only those that have no existing residential and have less than 500 existing non-residential sqft. estabs_joined = estabs_joined[
loader = TableLoader() ## spandex localhost db config db_config = dict(load_config().items('database')) ## UrbanCanvas db config urbancanvas_db_config = { 'database': 'mtc', 'host': '67.225.185.54', 'password': '******', 'port': '5432', 'user': '******' } ## if 'loading' schema not on localhost db, create. This schema is for tables to load to UrbanCanvas exec_sql("CREATE SCHEMA IF NOT EXISTS loading;") #UrbanCanvas exec_sql func, for executing sql on UrbanCanvas database def exec_sql2(query): print query conn_string = "host=67.225.185.54 dbname='mtc' user='******' password='******' port=5432" import psycopg2 conn = psycopg2.connect(conn_string) cur = conn.cursor() cur.execute(query) conn.commit() cur.close() conn.close()
# Install PostGIS and create staging schema. loader = TableLoader() with loader.database.cursor() as cur: cur.execute(""" CREATE EXTENSION IF NOT EXISTS postgis; CREATE SCHEMA IF NOT EXISTS staging; """) loader.database.refresh() # Load shapefiles specified above to the project database. loader.load_shp_map(shapefiles) # Fix invalid geometries and reproject. staging = loader.tables.staging conform_srids(loader.srid, schema=staging, fix=True) # Load county land use code mapping. csv = loader.get_path('built/parcel/2010/rtp13_processing_notes/lucodes.csv') df = pd.read_csv(csv, dtype=str) df.dropna(how='any', inplace=True, subset=['county_id', 'land_use_type_id', 'development_type_id']) df.index.name = 'index' df_to_db(df, 'lucodes', schema=staging) # Add county land use code mapping unique constraint. exec_sql(""" ALTER TABLE staging.lucodes ADD CONSTRAINT lucodes_unique UNIQUE (county_id, land_use_type_id); """)
return sql.read_frame(query, conn) ##There are some duplicate parcel_id's in local_effect_distances, delete one record from each duplicate pair delete_dup_parcel_id_sql = ''' with a as (select parcel_id, count(*) as numparcels from staging.local_effect_distances group by parcel_id) , b as( select parcel_id from a where numparcels > 1 --These are the parcel_id's where one of two of each pair needs to be removed ) , c as( select max(ctid) as ctid_max from staging.local_effect_distances where parcel_id in (select parcel_id from b) group by parcel_id --These are the ctid's to delete ) delete from staging.local_effect_distances where ctid in (select ctid_max from c); ''' exec_sql(delete_dup_parcel_id_sql) #Join parcels to local_effect_distances parcel_join_sql = ''' DROP table if exists public.parcels; SELECT a.*, b.distance_to_coast, b.distance_to_freeway, b.distance_to_onramp, b.distance_to_park, b.distance_to_school, b.distance_to_transit into public.parcels from staging.parcels a join staging.local_effect_distances b on a.parcel_id = b.parcel_id; ALTER TABLE public.parcels DROP COLUMN gid; ALTER TABLE public.parcels RENAME COLUMN developmen TO development_type_id; ALTER TABLE public.parcels RENAME COLUMN parcel_acr TO parcel_acres; ALTER TABLE public.parcels RENAME COLUMN proportion TO proportion_undevelopable; ''' exec_sql(parcel_join_sql) #XY coords
summary = df.groupby(grouper).sum() if grouper == 'taz': summary['residential_units_target'] = targetunits taz_df = summary output_tablename = 'summary_{}'.format(grouper) df_to_db(summary, output_tablename, schema=loader.tables.public) parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels') config = load_config() db_config = dict(config.items('database')) if 'taz_id' in db_to_df("SELECT column_name FROM information_schema.columns WHERE table_name='parcel'").column_name.values: exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;") if 'parcel_acres' in db_to_df("SELECT column_name FROM information_schema.columns WHERE table_name='parcel'").column_name.values: exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;") ## Export parcel shapefile to output directory os.system('pgsql2shp -f "%s" -h %s -u %s -P %s %s parcel' % (parcel_output_dir, db_config['host'], db_config['user'], db_config['password'], db_config['database'])) ## Export buildings as csv building_output_path = loader.get_path('out/regeneration/summaries/buildings.csv') buildings = db_to_df('select * from building').set_index('building_id') buildings.to_csv(building_output_path) ## Export TAZ summary file summary_built_space = db_to_df('select * from summary_built_space').set_index('taz1454') summary_price = db_to_df('select * from summary_price').set_index('taz')
import pandas as pd, numpy as np import pandas.io.sql as sql from pandas.io.excel import read_excel from spandex.io import exec_sql, df_to_db from spandex import TableLoader loader = TableLoader() ##Read Redfin CSV and load to database redfin_csv_path = loader.get_path('built/bldg/homeprices/redfin_03feb14.csv') redfin = pd.read_csv(redfin_csv_path) redfin.index.name = 'idx' df_to_db(redfin, 'redfin', schema=loader.tables.staging) ##Lat/long to point geometry, with the right SRID exec_sql("ALTER TABLE staging.redfin ADD COLUMN geom geometry;") exec_sql("UPDATE staging.redfin SET geom = ST_GeomFromText('POINT(' || longitude || ' ' || latitude || ')',4326);") exec_sql("CREATE INDEX redfin_gidx on staging.redfin using gist (geom);") exec_sql("SELECT UpdateGeometrySRID('staging', 'redfin', 'geom', 2768);") exec_sql("UPDATE staging.redfin SET geom = ST_TRANSFORM(ST_SetSRID(geom, 4326), 2768);") ##Append the unique parcel identifier to the Redfin records exec_sql("alter table staging.redfin add gid integer default 0;") exec_sql("update staging.redfin set gid = a.gid from parcels a where st_within(staging.redfin.geom, a.geom);") def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) ##Load as dataframes for the imputation