def append_parcel_identifier(tbl, schema, tbl_geom, parcel_identifier):
    """Append parcel identifier to target table """
    print 'Appending parcel identifier to %s' % tbl
    if not field_in_table(parcel_identifier, tbl):
        exec_sql("alter table %s.%s add %s integer default 0;" %(schema, tbl, parcel_identifier))
    exec_sql("update %s.%s set %s = a.%s from parcels a where st_contains(a.geom, %s.%s.%s);" %
              (schema, tbl, parcel_identifier, parcel_identifier, schema, tbl, tbl_geom))
def lat_long_to_point_geometry(tbl, schema, x_col = 'longitude', y_col = 'latitude', geom_field_name = 'geom', target_srid = 2768):
    """Creates point geometry on db table based on lat long fields"""
    print 'Creating point geometry from lat/long in table %s.' % tbl 
    if not field_in_table(geom_field_name, tbl):
        exec_sql("ALTER TABLE %s.%s ADD COLUMN %s geometry;" % (schema, tbl, geom_field_name))
    exec_sql("UPDATE %s.%s SET %s = ST_GeomFromText('POINT(' || %s || ' ' || %s || ')',4326);" % 
             (schema, tbl, geom_field_name, x_col, y_col))
    exec_sql("CREATE INDEX %s_gidx on %s.%s using gist (%s);" % (tbl, schema, tbl, geom_field_name))
    exec_sql("SELECT UpdateGeometrySRID('%s', '%s', '%s', %s);" % (schema, tbl, geom_field_name, target_srid))
    exec_sql("UPDATE %s.%s SET %s = ST_TRANSFORM(ST_SetSRID(%s, 4326), %s);" % (schema, tbl, geom_field_name, geom_field_name, target_srid))
Beispiel #3
0

### UrbanCanvas

## spandex localhost db config
db_config = dict(load_config().items('database'))  

## UrbanCanvas db config
urbancanvas_db_config = {'database': 'sandag',
                         'host': 'urbancanvas.cp2xwchuariu.us-west-2.rds.amazonaws.com',
                         'password': '******',
                         'port': '5432',
                         'user': '******'}

## if 'loading' schema not on localhost db, create.  This schema is for tables to load to UrbanCanvas
exec_sql("CREATE SCHEMA IF NOT EXISTS loading;")  


#UrbanCanvas exec_sql func, for executing sql on UrbanCanvas database
def exec_sql2(query):
    print query
    conn_string = "host=urbancanvas.cp2xwchuariu.us-west-2.rds.amazonaws.com dbname='sandag' user='******' password='******' port=5432"
    import psycopg2
    conn=psycopg2.connect(conn_string)
    cur = conn.cursor()
    cur.execute(query)
    conn.commit()
    cur.close()
    conn.close()

Beispiel #4
0
loader = TableLoader()
t = loader.tables

###Tagging parcels with taz id based on point in poly
tag(t.public.parcels, 'taz', t.staging.taz, 'taz_key')


###Deal with special cases where parcels do not fall within TAZ boundaries
def db_to_df(query):
    """Executes SQL query and returns DataFrame."""
    conn = loader.database._connection
    return sql.read_frame(query, conn)


exec_sql("""
CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) RETURNS integer AS $BODY$ BEGIN FOR i IN 0..maxslice LOOP IF st_expand(a,maxe*i/maxslice) && b THEN RETURN i; END IF; END LOOP; RETURN 99999999; END; $BODY$ LANGUAGE plpgsql IMMUTABLE COST 100; ALTER FUNCTION expandoverlap_metric(geometry, geometry, double precision, double precision) OWNER TO postgres
""")

if db_to_df("select exists (select 1 from pg_type where typname = 'pgis_nn');"
            ).values[0][0] == False:
    exec_sql("CREATE TYPE pgis_nn AS (nn_gid integer, nn_dist numeric(16,5))")

exec_sql("""
CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ DECLARE strsql text; rec pgis_nn; ncollected integer; it integer; BEGIN ncollected := 0; it := 0; WHILE ncollected < numnn AND it <= maxslices LOOP strsql := 'SELECT currentit.' || sgid2field || ', st_distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || '  as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND st_distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND st_expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) ||  ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY st_distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' ||  CAST((numnn - ncollected) As varchar(200)); FOR rec in EXECUTE (strsql) LOOP IF ncollected < numnn THEN ncollected := ncollected + 1; RETURN NEXT rec; ELSE EXIT; END IF; END LOOP; it := it + 1; END LOOP; END $BODY$ LANGUAGE plpgsql STABLE COST 100 ROWS 1000; ALTER FUNCTION _pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres
""")

exec_sql("""
CREATE OR REPLACE FUNCTION pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ SELECT * FROM _pgis_fn_nn($1,$2, $3, $4, $5, $6, $7, $8); $BODY$ LANGUAGE sql STABLE COST 100 ROWS 1000; ALTER FUNCTION pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres
""")

exec_sql('drop table if exists parcels_outside_taz;')
Beispiel #5
0
for i in range(73):
    if i < 10:
        filename = 'p50%s_d00.shp' % i
    else:
        filename = 'p5%s_d00.shp' % i
    filepath = os.path.join(loader.get_path('puma_geom'), filename)

    if os.path.exists(filepath):
        subfile_name = filename[:-4]
        shapefiles['staging.%s' % subfile_name] = 'puma_geom/%s' % filename

loader.load_shp_map(shapefiles)

conform_srids(loader.srid, schema=loader.tables.staging, fix=True)

exec_sql("DROP table if exists staging.puma00;")

sql_str = ""
for i in range(73):
    if i < 10:
        filename = 'p50%s_d00.shp' % i
    else:
        filename = 'p5%s_d00.shp' % i
    filepath = os.path.join(loader.get_path('puma_geom'), filename)

    if os.path.exists(filepath):
        subfile_name = filename[:-4]
        sql_str = sql_str + 'select area, perimeter, puma5, name, geom from staging.%s' % subfile_name
        if i < 72:
            sql_str = sql_str + ' UNION ALL '
###Database connection
loader = TableLoader()
t = loader.tables

###Tagging parcels with taz id based on point in poly
tag(t.public.parcels, 'taz', t.staging.taz, 'taz_key')


###Deal with special cases where parcels do not fall within TAZ boundaries
def db_to_df(query):
    """Executes SQL query and returns DataFrame."""
    conn = loader.database._connection
    return sql.read_frame(query, conn)
    
exec_sql("""
CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision) RETURNS integer AS $BODY$ BEGIN FOR i IN 0..maxslice LOOP IF st_expand(a,maxe*i/maxslice) && b THEN RETURN i; END IF; END LOOP; RETURN 99999999; END; $BODY$ LANGUAGE plpgsql IMMUTABLE COST 100; ALTER FUNCTION expandoverlap_metric(geometry, geometry, double precision, double precision) OWNER TO postgres
""")

if db_to_df("select exists (select 1 from pg_type where typname = 'pgis_nn');").values[0][0] == False:
    exec_sql("CREATE TYPE pgis_nn AS (nn_gid integer, nn_dist numeric(16,5))")
    
exec_sql("""
CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ DECLARE strsql text; rec pgis_nn; ncollected integer; it integer; BEGIN ncollected := 0; it := 0; WHILE ncollected < numnn AND it <= maxslices LOOP strsql := 'SELECT currentit.' || sgid2field || ', st_distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || '  as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND st_distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND st_expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) ||  ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY st_distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' ||  CAST((numnn - ncollected) As varchar(200)); FOR rec in EXECUTE (strsql) LOOP IF ncollected < numnn THEN ncollected := ncollected + 1; RETURN NEXT rec; ELSE EXIT; END IF; END LOOP; it := it + 1; END LOOP; END $BODY$ LANGUAGE plpgsql STABLE COST 100 ROWS 1000; ALTER FUNCTION _pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres
""")

exec_sql("""
CREATE OR REPLACE FUNCTION pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset character varying, swhere character varying, sgid2field character varying, sgeom2field character varying) RETURNS SETOF pgis_nn AS $BODY$ SELECT * FROM _pgis_fn_nn($1,$2, $3, $4, $5, $6, $7, $8); $BODY$ LANGUAGE sql STABLE COST 100 ROWS 1000; ALTER FUNCTION pgis_fn_nn(geometry, double precision, integer, integer, character varying, character varying, character varying, character varying) OWNER TO postgres
""")

exec_sql('drop table if exists parcels_outside_taz;')
nonres_sqft_zone = pd.DataFrame({'observed':parcels.groupby('taz').non_residential_sqft.sum(), 'target':targetunits.targetnonressqft})

# For all employment points, translate to nonres-sqft by multiplying by 250.
# Filter out synthetic job-based buildings so that we keep only those that have no residential and have less than 500 existing sqft. 
# For each TAZ, calculate the difference needed to match aggregate target.
# If need to increment nrsqft upwards, sort synthetic buildings by sqft and take the top x that covers the needed difference
# If no valid job points and non existing nonres-sqft, introduce a synthetic building in the TAZ-  equal to the target, and put it on the biggest parcel.
# Do same in the case of no parcels (and add synthetic parcel)
# Scale to match

#No need to tag in imputation_flag column based on scaling- otherwise everything would be tagged.
nonres_sqft_zone['difference'] = nonres_sqft_zone.target - nonres_sqft_zone.observed

##Append the unique parcel identifier to the establisment point records.
if 'parcel_id' not in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='establishment_points'").column_name.values:
    exec_sql("alter table staging.establishment_points add parcel_id integer default 0;")
    exec_sql("update staging.establishment_points set parcel_id = a.gid from parcels a where st_within(staging.establishment_points.geom, a.geom);")

#Load the establishment points to be used for non-residential sqft imputation
estab_points = db_to_df('select emp_here, naics2, parcel_id from staging.establishment_points;')
estabs_joined = pd.merge(estab_points, parcels.reset_index(), left_on = 'parcel_id', right_on = 'gid')

# Filter out some synthetic job-based buildings so that we keep only those that have no existing residential and have less than 500 existing non-residential sqft. 
estabs_joined = estabs_joined[np.logical_or((estabs_joined.residential_units == 0), (estabs_joined.residential_units.isnull())) & np.logical_or((estabs_joined.non_residential_sqft < 500), (estabs_joined.non_residential_sqft.isnull()))]

# For all employment points, translate to nonres-sqft by multiplying by 250.
estabs_joined['nrsqft_assumed'] = 250 * estabs_joined.emp_here

#Aggregate employment-point-based-buildings to the parcel level
estabs_joined_nrsqft = estabs_joined.groupby('gid').nrsqft_assumed.sum()
estabs_joined_taz = estabs_joined.groupby('gid').taz.max()
parcels['costar_elevators'] = costar.groupby('gid').elevators.max()
parcels['costar_rent'] = costar.groupby('gid').rent.max()


######## *UPDATE* ########
print 'Point-based impute done. Loading results back to db.'

# Updated fields back to database
parcels_imputed = parcels[['year_built', 'sqft_per_unit', 'non_residential_sqft', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'gov_type', 'gov_sqft', 'costar_property_type', 'costar_secondary_type', 'costar_building_name', 'costar_elevators', 'costar_rent', 'imputation_flag']]
for col in ['redfin_home_type', 'costar_property_type', 'costar_secondary_type', 'costar_building_name', 'costar_rent']:
    parcels_imputed[col][parcels_imputed[col].isnull()] = ''
    parcels_imputed[col] = parcels_imputed[col].str.encode('utf-8')
df_to_db(parcels_imputed, 'parcels_imputed', schema=loader.tables.staging)

#Update the master parcel table on the database
exec_sql("update parcels set year_built = a.year_built from staging.parcels_imputed a where a.gid = parcels.gid;")
exec_sql("update parcels set sqft_per_unit = a.sqft_per_unit from staging.parcels_imputed a where a.gid = parcels.gid;")
exec_sql("update parcels set non_residential_sqft = a.non_residential_sqft from staging.parcels_imputed a where a.gid = parcels.gid;")
exec_sql("update parcels set imputation_flag = a.imputation_flag from staging.parcels_imputed a where a.gid = parcels.gid;")

def add_field_and_populate_with_imputed_value(varname, vartype):
    if not field_in_table(varname, 'parcels'):
        exec_sql("ALTER TABLE parcels ADD COLUMN %s %s;" % (varname, vartype))
    exec_sql("update parcels set %s = a.%s from staging.parcels_imputed a where a.gid = parcels.gid;" % (varname, varname))
    
add_field_and_populate_with_imputed_value('redfin_sale_price', 'numeric')
add_field_and_populate_with_imputed_value('redfin_sale_year', 'numeric')
add_field_and_populate_with_imputed_value('redfin_home_type', 'text')
add_field_and_populate_with_imputed_value('gov_type', 'numeric')
add_field_and_populate_with_imputed_value('gov_sqft', 'numeric')
add_field_and_populate_with_imputed_value('costar_elevators', 'numeric')
exec_sql("""
-- SCL
drop table if exists condos_scl;

SELECT 
max(county_id) as county_id,
max(apn) as apn,
max(parcel_id_local) as parcel_id_local,
max(land_use_type_id) as land_use_type_id,
max(res_type) as res_type,
sum(land_value) as land_value,
sum(improvement_value) as improvement_value,
max(year_assessed) as year_assessed,
max(year_built) as year_built,
sum(building_sqft) as building_sqft,
sum(non_residential_sqft) as non_residential_sqft,
sum(residential_units) as residential_units,
max(sqft_per_unit) as sqft_per_unit,
max(stories) as stories,
max(tax_exempt) as tax_exempt,
condo_identifier,
ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom,
max(gid) as gid,
'merged'  as imputation_flag,
max(development_type_id) as development_type_id
into condos_scl
FROM parcels
where county_id = '085' AND land_use_type_id = 'RCON' AND length(condo_identifier)>3
GROUP BY condo_identifier;

delete from parcels where county_id = '085' AND land_use_type_id = 'RCON' AND length(condo_identifier)>3 ;

insert into parcels select * from condos_scl;

-- CNC
drop table if exists condos_cnc;

SELECT 
max(county_id) as county_id,
max(apn) as apn,
max(parcel_id_local) as parcel_id_local,
max(land_use_type_id) as land_use_type_id,
max(res_type) as res_type,
sum(land_value) as land_value,
sum(improvement_value) as improvement_value,
max(year_assessed) as year_assessed,
max(year_built) as year_built,
sum(building_sqft) as building_sqft,
sum(non_residential_sqft) as non_residential_sqft,
sum(residential_units) as residential_units,
max(sqft_per_unit) as sqft_per_unit,
max(stories) as stories,
max(tax_exempt) as tax_exempt,
condo_identifier,
ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom,
max(gid) as gid,
'merged'  as imputation_flag,
max(development_type_id) as development_type_id
into condos_cnc
FROM parcels
where county_id = '013' AND land_use_type_id = '29' AND length(condo_identifier)>3
GROUP BY condo_identifier;

delete from parcels where county_id = '013' AND land_use_type_id = '29' AND length(condo_identifier)>3 ; 

insert into parcels select * from condos_cnc;

-- SON

drop table if exists condos_son;

SELECT 
max(county_id) as county_id,
max(apn) as apn,
max(parcel_id_local) as parcel_id_local,
max(land_use_type_id) as land_use_type_id,
max(res_type) as res_type,
sum(land_value) as land_value,
sum(improvement_value) as improvement_value,
max(year_assessed) as year_assessed,
max(year_built) as year_built,
sum(building_sqft) as building_sqft,
sum(non_residential_sqft) as non_residential_sqft,
sum(residential_units) as residential_units,
max(sqft_per_unit) as sqft_per_unit,
max(stories) as stories,
max(tax_exempt) as tax_exempt,
condo_identifier,
ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom,
max(gid) as gid,
'merged'  as imputation_flag,
max(development_type_id) as development_type_id
into condos_son
FROM parcels
where county_id = '097' AND length(condo_identifier)>3 and res_type = 'multi'
GROUP BY condo_identifier;

delete from parcels where county_id = '097' AND length(condo_identifier)>3 and res_type = 'multi';

insert into parcels select * from condos_son;
""")
Beispiel #10
0
    return sql.read_frame(query, conn)


##There are some duplicate parcel_id's in local_effect_distances, delete one record from each duplicate pair
delete_dup_parcel_id_sql = '''
with a as (select parcel_id, count(*) as numparcels from staging.local_effect_distances group by parcel_id)
, b as(
select parcel_id from a where numparcels > 1 --These are the parcel_id's where one of two of each pair needs to be removed
)
, c as(
select max(ctid) as ctid_max from staging.local_effect_distances where parcel_id in (select parcel_id from b) group by parcel_id  --These are the ctid's to delete
)
delete from staging.local_effect_distances
where ctid in (select ctid_max from c);
'''
exec_sql(delete_dup_parcel_id_sql)

#Join parcels to local_effect_distances
parcel_join_sql = '''
DROP table if exists public.parcels;
SELECT a.*, b.distance_to_coast, b.distance_to_freeway, b.distance_to_onramp,  b.distance_to_park, b.distance_to_school, b.distance_to_transit into public.parcels from staging.parcels a join staging.local_effect_distances b on a.parcel_id = b.parcel_id;
ALTER TABLE public.parcels DROP COLUMN gid;
ALTER TABLE public.parcels RENAME COLUMN developmen TO development_type_id;
ALTER TABLE public.parcels RENAME COLUMN parcel_acr TO parcel_acres;
ALTER TABLE public.parcels RENAME COLUMN proportion TO proportion_undevelopable;
'''
exec_sql(parcel_join_sql)

#XY coords
exec_sql("alter table public.parcels add centroid geometry;")
exec_sql("update public.parcels set centroid = ST_centroid(geom);")
exec_sql("""
-- SCL
drop table if exists condos_scl;

SELECT 
max(county_id) as county_id,
max(apn) as apn,
max(parcel_id_local) as parcel_id_local,
max(land_use_type_id) as land_use_type_id,
max(res_type) as res_type,
sum(land_value) as land_value,
sum(improvement_value) as improvement_value,
max(year_assessed) as year_assessed,
max(year_built) as year_built,
sum(building_sqft) as building_sqft,
sum(non_residential_sqft) as non_residential_sqft,
sum(residential_units) as residential_units,
max(sqft_per_unit) as sqft_per_unit,
max(stories) as stories,
max(tax_exempt) as tax_exempt,
condo_identifier,
ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom,
max(gid) as gid,
'merged'  as imputation_flag,
max(development_type_id) as development_type_id
into condos_scl
FROM parcels
where county_id = '085' AND land_use_type_id = 'RCON' AND length(condo_identifier)>3
GROUP BY condo_identifier;

delete from parcels where county_id = '085' AND land_use_type_id = 'RCON' AND length(condo_identifier)>3 ;

insert into parcels select * from condos_scl;

-- CNC
drop table if exists condos_cnc;

SELECT 
max(county_id) as county_id,
max(apn) as apn,
max(parcel_id_local) as parcel_id_local,
max(land_use_type_id) as land_use_type_id,
max(res_type) as res_type,
sum(land_value) as land_value,
sum(improvement_value) as improvement_value,
max(year_assessed) as year_assessed,
max(year_built) as year_built,
sum(building_sqft) as building_sqft,
sum(non_residential_sqft) as non_residential_sqft,
sum(residential_units) as residential_units,
max(sqft_per_unit) as sqft_per_unit,
max(stories) as stories,
max(tax_exempt) as tax_exempt,
condo_identifier,
ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom,
max(gid) as gid,
'merged'  as imputation_flag,
max(development_type_id) as development_type_id
into condos_cnc
FROM parcels
where county_id = '013' AND land_use_type_id = '29' AND length(condo_identifier)>3
GROUP BY condo_identifier;

delete from parcels where county_id = '013' AND land_use_type_id = '29' AND length(condo_identifier)>3 ; 

insert into parcels select * from condos_cnc;

-- SON

drop table if exists condos_son;

SELECT 
max(county_id) as county_id,
max(apn) as apn,
max(parcel_id_local) as parcel_id_local,
max(land_use_type_id) as land_use_type_id,
max(res_type) as res_type,
sum(land_value) as land_value,
sum(improvement_value) as improvement_value,
max(year_assessed) as year_assessed,
max(year_built) as year_built,
sum(building_sqft) as building_sqft,
sum(non_residential_sqft) as non_residential_sqft,
sum(residential_units) as residential_units,
max(sqft_per_unit) as sqft_per_unit,
max(stories) as stories,
max(tax_exempt) as tax_exempt,
condo_identifier,
ST_CollectionExtract(ST_Multi(ST_Union(geom)), 3) AS geom,
max(gid) as gid,
'merged'  as imputation_flag,
max(development_type_id) as development_type_id
into condos_son
FROM parcels
where county_id = '097' AND length(condo_identifier)>3 and res_type = 'multi'
GROUP BY condo_identifier;

delete from parcels where county_id = '097' AND length(condo_identifier)>3 and res_type = 'multi';

insert into parcels select * from condos_son;
""")
Beispiel #12
0
    if grouper == 'taz':
        summary['residential_units_target'] = targetunits
        taz_df = summary

    output_tablename = 'summary_{}'.format(grouper)
    df_to_db(summary, output_tablename, schema=loader.tables.public)

parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels')

config = load_config()
db_config = dict(config.items('database'))

if 'taz_id' in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'"
).column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;")
if 'parcel_acres' in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'"
).column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;")

##  Export parcel shapefile to output directory
os.system('pgsql2shp -f "%s" -h %s -u %s -P %s %s parcel' %
          (parcel_output_dir, db_config['host'], db_config['user'],
           db_config['password'], db_config['database']))

# Export simplified parcel shapefile
os.system(
    'pgsql2shp -f "%s" -h %s -u %s -P %s %s "select geom_id, parcel_id, st_simplifypreservetopology(geom, 100) from parcel"'
    % (parcel_output_dir, db_config['host'], db_config['user'],
       db_config['password'], db_config['database']))
Beispiel #13
0
    if i < 10:
        filename = 'p50%s_d00.shp' % i
    else:
        filename = 'p5%s_d00.shp' % i
    filepath = os.path.join(loader.get_path('puma_geom'), filename)
    
    if os.path.exists(filepath):
        subfile_name = filename[:-4]
        shapefiles['staging.%s' % subfile_name] = 'puma_geom/%s' % filename
        
loader.load_shp_map(shapefiles)


conform_srids(loader.srid, schema=loader.tables.staging, fix=True)

exec_sql("DROP table if exists staging.puma00;")

sql_str = ""
for i in range(73): 
    if i < 10:
        filename = 'p50%s_d00.shp' % i
    else:
        filename = 'p5%s_d00.shp' % i
    filepath = os.path.join(loader.get_path('puma_geom'), filename)
    
    if os.path.exists(filepath):
        subfile_name = filename[:-4]
        sql_str = sql_str + 'select area, perimeter, puma5, name, geom from staging.%s' % subfile_name
        if i < 72:
            sql_str = sql_str + ' UNION ALL '
        
def add_field_and_populate_with_imputed_value(varname, vartype):
    if not field_in_table(varname, 'parcels'):
        exec_sql("ALTER TABLE parcels ADD COLUMN %s %s;" % (varname, vartype))
    exec_sql("update parcels set %s = a.%s from staging.parcels_imputed a where a.gid = parcels.gid;" % (varname, varname))
# For each TAZ, calculate the difference needed to match aggregate target.
# If need to increment nrsqft upwards, sort synthetic buildings by sqft and take the top x that covers the needed difference
# If no valid job points and non existing nonres-sqft, introduce a synthetic building in the TAZ-  equal to the target, and put it on the biggest parcel.
# Do same in the case of no parcels (and add synthetic parcel)
# Scale to match

#No need to tag in imputation_flag column based on scaling- otherwise everything would be tagged.
nonres_sqft_zone[
    'difference'] = nonres_sqft_zone.target - nonres_sqft_zone.observed

##Append the unique parcel identifier to the establisment point records.
if 'parcel_id' not in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='establishment_points'"
).column_name.values:
    exec_sql(
        "alter table staging.establishment_points add parcel_id integer default 0;"
    )
    exec_sql(
        "update staging.establishment_points set parcel_id = a.gid from parcels a where st_within(staging.establishment_points.geom, a.geom);"
    )

#Load the establishment points to be used for non-residential sqft imputation
estab_points = db_to_df(
    'select emp_here, naics2, parcel_id from staging.establishment_points;')
estabs_joined = pd.merge(estab_points,
                         parcels.reset_index(),
                         left_on='parcel_id',
                         right_on='gid')

# Filter out some synthetic job-based buildings so that we keep only those that have no existing residential and have less than 500 existing non-residential sqft.
estabs_joined = estabs_joined[
Beispiel #16
0
loader = TableLoader()

## spandex localhost db config
db_config = dict(load_config().items('database'))

## UrbanCanvas db config
urbancanvas_db_config = {
    'database': 'mtc',
    'host': '67.225.185.54',
    'password': '******',
    'port': '5432',
    'user': '******'
}

## if 'loading' schema not on localhost db, create.  This schema is for tables to load to UrbanCanvas
exec_sql("CREATE SCHEMA IF NOT EXISTS loading;")


#UrbanCanvas exec_sql func, for executing sql on UrbanCanvas database
def exec_sql2(query):
    print query
    conn_string = "host=67.225.185.54 dbname='mtc' user='******' password='******' port=5432"
    import psycopg2
    conn = psycopg2.connect(conn_string)
    cur = conn.cursor()
    cur.execute(query)
    conn.commit()
    cur.close()
    conn.close()

Beispiel #17
0

# Install PostGIS and create staging schema.
loader = TableLoader()
with loader.database.cursor() as cur:
    cur.execute("""
        CREATE EXTENSION IF NOT EXISTS postgis;
        CREATE SCHEMA IF NOT EXISTS staging;
    """)
loader.database.refresh()

# Load shapefiles specified above to the project database.
loader.load_shp_map(shapefiles)

# Fix invalid geometries and reproject.
staging = loader.tables.staging
conform_srids(loader.srid, schema=staging, fix=True)

# Load county land use code mapping.
csv = loader.get_path('built/parcel/2010/rtp13_processing_notes/lucodes.csv')
df = pd.read_csv(csv, dtype=str)
df.dropna(how='any', inplace=True,
          subset=['county_id', 'land_use_type_id', 'development_type_id'])
df.index.name = 'index'
df_to_db(df, 'lucodes', schema=staging)

# Add county land use code mapping unique constraint.
exec_sql("""
ALTER TABLE staging.lucodes ADD CONSTRAINT lucodes_unique
UNIQUE (county_id, land_use_type_id);
""")
Beispiel #18
0
    return sql.read_frame(query, conn)


##There are some duplicate parcel_id's in local_effect_distances, delete one record from each duplicate pair
delete_dup_parcel_id_sql = '''
with a as (select parcel_id, count(*) as numparcels from staging.local_effect_distances group by parcel_id)
, b as(
select parcel_id from a where numparcels > 1 --These are the parcel_id's where one of two of each pair needs to be removed
)
, c as(
select max(ctid) as ctid_max from staging.local_effect_distances where parcel_id in (select parcel_id from b) group by parcel_id  --These are the ctid's to delete
)
delete from staging.local_effect_distances
where ctid in (select ctid_max from c);
'''
exec_sql(delete_dup_parcel_id_sql)


#Join parcels to local_effect_distances
parcel_join_sql = '''
DROP table if exists public.parcels;
SELECT a.*, b.distance_to_coast, b.distance_to_freeway, b.distance_to_onramp,  b.distance_to_park, b.distance_to_school, b.distance_to_transit into public.parcels from staging.parcels a join staging.local_effect_distances b on a.parcel_id = b.parcel_id;
ALTER TABLE public.parcels DROP COLUMN gid;
ALTER TABLE public.parcels RENAME COLUMN developmen TO development_type_id;
ALTER TABLE public.parcels RENAME COLUMN parcel_acr TO parcel_acres;
ALTER TABLE public.parcels RENAME COLUMN proportion TO proportion_undevelopable;
'''
exec_sql(parcel_join_sql)


#XY coords
    summary = df.groupby(grouper).sum()

    if grouper == 'taz':
        summary['residential_units_target'] = targetunits
        taz_df = summary

    output_tablename = 'summary_{}'.format(grouper)
    df_to_db(summary, output_tablename, schema=loader.tables.public)

parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels')

config = load_config()
db_config = dict(config.items('database'))

if 'taz_id' in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'").column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;")
if 'parcel_acres' in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'").column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;")

##  Export parcel shapefile to output directory
os.system('pgsql2shp -f "%s" -h %s -u %s -P %s %s parcel' % (parcel_output_dir, db_config['host'], db_config['user'], db_config['password'], db_config['database']))

##  Export buildings as csv
building_output_path = loader.get_path('out/regeneration/summaries/buildings.csv')
    
buildings = db_to_df('select * from building').set_index('building_id')
buildings.to_csv(building_output_path)

## Export TAZ summary file
summary_built_space = db_to_df('select * from summary_built_space').set_index('taz1454')
summary_price = db_to_df('select * from summary_price').set_index('taz')
import pandas as pd, numpy as np
import pandas.io.sql as sql
from pandas.io.excel import read_excel
from spandex.io import exec_sql,  df_to_db
from spandex import TableLoader

loader = TableLoader()

##Read Redfin CSV and load to database
redfin_csv_path = loader.get_path('built/bldg/homeprices/redfin_03feb14.csv')
redfin = pd.read_csv(redfin_csv_path)
redfin.index.name = 'idx'
df_to_db(redfin, 'redfin', schema=loader.tables.staging)

##Lat/long to point geometry, with the right SRID
exec_sql("ALTER TABLE staging.redfin ADD COLUMN geom geometry;")
exec_sql("UPDATE staging.redfin SET geom = ST_GeomFromText('POINT(' || longitude || ' ' || latitude || ')',4326);")
exec_sql("CREATE INDEX redfin_gidx on staging.redfin using gist (geom);")
exec_sql("SELECT UpdateGeometrySRID('staging', 'redfin', 'geom', 2768);")
exec_sql("UPDATE staging.redfin SET geom = ST_TRANSFORM(ST_SetSRID(geom, 4326), 2768);")

##Append the unique parcel identifier to the Redfin records
exec_sql("alter table staging.redfin add gid integer default 0;")
exec_sql("update staging.redfin set gid = a.gid from parcels a where st_within(staging.redfin.geom, a.geom);")

def db_to_df(query):
    """Executes SQL query and returns DataFrame."""
    conn = loader.database._connection
    return sql.read_frame(query, conn)

##Load as dataframes for the imputation