Example #1
0
def export_ala(parcels_out):
    df = parcels_out.to_frame()

    # Cast "land_use_type_id" to string for compatibility with other counties.
    df['land_use_type_id'] = df.land_use_type_id.astype(str)

    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_ala', schema=staging)
Example #2
0
def export_ala(parcels_out):
    df = parcels_out.to_frame()

    # Cast "land_use_type_id" to string for compatibility with other counties.
    df['land_use_type_id'] = df.land_use_type_id.astype(str)

    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_ala', schema=staging)
targetunits['sf'] = buildings2[buildings2.res_type == 'single'].groupby(
    'taz').residential_units.sum()
targetunits['mf'] = buildings2[buildings2.res_type == 'multi'].groupby(
    'taz').residential_units.sum()
targetunits['nrsqft'] = buildings2.groupby('taz').non_residential_sqft.sum()
print targetunits[[
    'sf', 'targetSF', 'mf', 'targetMF', 'nrsqft', 'targetnonressqft'
]].head()
targetunits[['sf', 'targetSF', 'mf', 'targetMF', 'nrsqft',
             'targetnonressqft']].sum()
# summary_output_path = loader.get_path('out/regeneration/summaries/built_space_summary.csv')
# targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].to_csv(summary_output_path)
targetunits = targetunits[[
    'sf', 'targetSF', 'mf', 'targetMF', 'nrsqft', 'targetnonressqft'
]]
df_to_db(targetunits, 'summary_built_space', schema=loader.tables.public)

# EXPORT BUILDINGS TO DB
print 'Loading processed buildings to db'
df_to_db(buildings2, 'buildings', schema=loader.tables.public)

## Create geom_id (serves similar purpose to joinnumA) on parcels based on integer representation of geometry hash
idx = []
geom_hashes = []
for i, geom in parcels.geom.iteritems():
    idx.append(i)
    md5_hash = str(hashlib.md5(geom).hexdigest())
    geom_hashes.append(int(md5_hash[0:11], 16))

parcel_identifier = pd.Series(geom_hashes, index=idx)
parcels['geom_id'] = parcel_identifier
Example #4
0
def export_cnc(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans
    df.res_type[df.res_type.isnull()] = ''
    df_to_db(df, 'attributes_cnc', schema=staging)
    probabilities = np.ones(len(alternative_ids))
    num_resunits = len(alts)
    print 'There are %s residential units in TAZ %s' % (num_resunits, taz)
    choices = unit_choice(chooser_ids,alternative_ids,probabilities)
    #households_urbansim.building_id[np.in1d(households_urbansim.index.values,chooser_ids)] = choices.values
    hh.loc[chooser_ids,'building_id'] = choices
    if num_hh > num_resunits:
        print 'Warning:  number of households exceeds number of resunits in TAZ %s' % taz
targetunits['hh_allocated'] = pd.merge(hh, buildings, left_on = 'building_id', right_index = True).groupby('taz_x').size()

df = targetunits[['targetunits', 'hh', 'hh_allocated']]
df['occupancy'] = df.hh*1.0/df.targetunits

print df.head()
#summary_output_path = loader.get_path('out/regeneration/summaries/hh_summary.csv')
df_to_db(df, 'summary_hh', schema=loader.tables.public)



################
#####JOBS#######
################

sector_columns = []
for col in targetunits.columns:
    if col.startswith('e'):
        if col.endswith('_10'):
            sector_columns.append(col)

emp_targets = targetunits[sector_columns]
Example #6
0
def export_son(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_son', schema=staging)
import pandas as pd, numpy as np
import pandas.io.sql as sql
from pandas.io.excel import read_excel
from spandex.io import exec_sql,  df_to_db
from spandex import TableLoader

loader = TableLoader()

##Read Redfin CSV and load to database
redfin_csv_path = loader.get_path('built/bldg/homeprices/redfin_03feb14.csv')
redfin = pd.read_csv(redfin_csv_path)
redfin.index.name = 'idx'
df_to_db(redfin, 'redfin', schema=loader.tables.staging)

##Lat/long to point geometry, with the right SRID
exec_sql("ALTER TABLE staging.redfin ADD COLUMN geom geometry;")
exec_sql("UPDATE staging.redfin SET geom = ST_GeomFromText('POINT(' || longitude || ' ' || latitude || ')',4326);")
exec_sql("CREATE INDEX redfin_gidx on staging.redfin using gist (geom);")
exec_sql("SELECT UpdateGeometrySRID('staging', 'redfin', 'geom', 2768);")
exec_sql("UPDATE staging.redfin SET geom = ST_TRANSFORM(ST_SetSRID(geom, 4326), 2768);")

##Append the unique parcel identifier to the Redfin records
exec_sql("alter table staging.redfin add gid integer default 0;")
exec_sql("update staging.redfin set gid = a.gid from parcels a where st_within(staging.redfin.geom, a.geom);")

def db_to_df(query):
    """Executes SQL query and returns DataFrame."""
    conn = loader.database._connection
    return sql.read_frame(query, conn)
for grouper in ['county_id', 'taz']:
    df = tf[[grouper, 'non_residential_sqft', 'residential_units']]
    df.dropna(subset=[grouper], inplace=True)

    if grouper == 'taz':
        df[grouper] = df[grouper].astype(int)

    df['count'] = 1
    summary = df.groupby(grouper).sum()

    if grouper == 'taz':
        summary['residential_units_target'] = targetunits
        taz_df = summary

    output_tablename = 'summary_{}'.format(grouper)
    df_to_db(summary, output_tablename, schema=loader.tables.public)

parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels')

config = load_config()
db_config = dict(config.items('database'))

if 'taz_id' in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'").column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;")
if 'parcel_acres' in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'").column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;")

##  Export parcel shapefile to output directory
os.system('pgsql2shp -f "%s" -h %s -u %s -P %s %s parcel' % (parcel_output_dir, db_config['host'], db_config['user'], db_config['password'], db_config['database']))

##  Export buildings as csv
Example #9
0
random_allocate_agents(jobs, buildings, zone_id_col_name='block_id', capacity_col_name='job_spaces')


#EXPORT DEMAND AGENTS TO DB
if 'block_id' in jobs.columns:
    del jobs['block_id']
    
if 'mgra_id' in households.columns:
    del households['mgra_id']
    
for df in [jobs, households]:
    df['building_id'][df['building_id'].isnull()] = -1
    for col in df.columns:
        df[col] = df[col].astype('int')

df_to_db(jobs, 'jobs', schema=loader.tables.public)
df_to_db(households, 'households', schema=loader.tables.public)


#Export formated buildings to db
if 'luz_id' in buildings.columns:
    del buildings['luz_id']

if 'mgra_id' in buildings.columns:
    del buildings['mgra_id']
    
if 'block_id' in buildings.columns:
    del buildings['block_id']
    
if 'job_spaces' in buildings.columns:
    del buildings['job_spaces']
Example #10
0
                       zone_id_col_name='block_id',
                       capacity_col_name='job_spaces')

#EXPORT DEMAND AGENTS TO DB
if 'block_id' in jobs.columns:
    del jobs['block_id']

if 'mgra_id' in households.columns:
    del households['mgra_id']

for df in [jobs, households]:
    df['building_id'][df['building_id'].isnull()] = -1
    for col in df.columns:
        df[col] = df[col].astype('int')

df_to_db(jobs, 'jobs', schema=loader.tables.public)
df_to_db(households, 'households', schema=loader.tables.public)

#Export formated buildings to db
if 'luz_id' in buildings.columns:
    del buildings['luz_id']

if 'mgra_id' in buildings.columns:
    del buildings['mgra_id']

if 'block_id' in buildings.columns:
    del buildings['block_id']

if 'job_spaces' in buildings.columns:
    del buildings['job_spaces']
Example #11
0
for grouper in ['county_id', 'taz']:
    df = tf[[grouper, 'non_residential_sqft', 'residential_units']]
    df.dropna(subset=[grouper], inplace=True)

    if grouper == 'taz':
        df[grouper] = df[grouper].astype(int)

    df['count'] = 1
    summary = df.groupby(grouper).sum()

    if grouper == 'taz':
        summary['residential_units_target'] = targetunits
        taz_df = summary

    output_tablename = 'summary_{}'.format(grouper)
    df_to_db(summary, output_tablename, schema=loader.tables.public)

parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels')

config = load_config()
db_config = dict(config.items('database'))

if 'taz_id' in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'"
).column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;")
if 'parcel_acres' in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'"
).column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;")
Example #12
0
 def export(parcels_out):
     schema = loader.tables.sample
     df_to_db(parcels_out.to_frame(), 'parcels_out', schema=schema)
def csv_to_staging(path, table_name):
    """Loads csv to staging schema on database"""
    df = pd.read_csv(path)
    df.index.name = 'idx'
    print 'Loading %s.' % table_name
    df_to_db(df, table_name, schema=loader.tables.staging)
costar = pd.read_excel(costar_xls_path)
costar_solano_path = loader.get_path('built/bldg/costar/2011/costar__clean2011_sol_020315.csv')
costar_sol = pd.read_csv(costar_solano_path)
costar2 = costar[['PropertyID', 'Building Name', 'Latitude', 'Longitude', 'Rentable Building Area', 'Year Built', 'PropertyType', 'Secondary Type', 'Total Available Space (SF)', 'Number Of Elevators', 'Last Sale Date', 'Last Sale Price', 'Average Weighted Rent', 'Number Of Stories']]
costar_sol2 = costar_sol[['PropertyID', 'Building Name', 'Latitude', 'Longitude', 'Rentable Building Area', 'Year Built', 'PropertyType', 'Secondary Type', 'Total Available Space (SF)', 'Number Of Elevators', 'Last Sale Date', 'Last Sale Price', 'Average Weighted Rent', 'Number Of Stories']]
costar2.columns = ['propertyid', 'building_name', 'latitude', 'longitude', 'rentable_area', 'year_built', 'property_type', 'secondary_type', 'available_space', 'elevators', 'last_sale_date', 'last_sale_price', 'rent', 'stories']
costar_sol2.columns = ['propertyid', 'building_name', 'latitude', 'longitude', 'rentable_area', 'year_built', 'property_type', 'secondary_type', 'available_space', 'elevators', 'last_sale_date', 'last_sale_price', 'rent', 'stories']
costar2 = pd.concat([costar2, costar_sol2])
for tex_col in ['building_name', 'property_type', 'secondary_type', 'last_sale_date', ]:
       costar2[tex_col] = costar2[tex_col].fillna(' ')
       costar2[tex_col] = costar2[tex_col].str.encode('utf-8')
costar2.last_sale_date = costar2.last_sale_date.fillna(' ')
costar2.last_sale_price = costar2.last_sale_price.str.replace(",", "").astype('float')
costar2.stories = costar2.stories.fillna(0).astype('int32')
costar2.index.name = 'idx'
df_to_db(costar2, 'costar', schema=loader.tables.staging)
##Lat/long to point geometry, with the right SRID
lat_long_to_point_geometry('costar', 'staging', 'longitude', 'latitude', 'geom', 2768)
##Append the unique parcel identifier to the Costar records
append_parcel_identifier('costar', 'staging', 'geom', 'gid')


######## *IMPUTE* ########
print 'Start point-based impute.'

## Load dataframes for the imputation
parcels = db_to_df('select gid, year_built, sqft_per_unit, non_residential_sqft, stories, imputation_flag from parcels;').set_index('gid')
costar = db_to_df('select * from staging.costar;')
redfin = db_to_df('select gid, yearbuilt, sqft, lastsalepr, saleyear, hometype from staging.redfin;')
gov_buildings = db_to_df('select * from staging.public_bldgs;')
gov_buildings.sqft = gov_buildings.sqft.str.replace(',', '').astype('int')
Example #15
0
            col_name = col[0]
            col_type = col[1]
            firstval = df[col_name].loc[0]
            if firstval in (True, False):
                if type(firstval) == bool:
                    df[col_name] = df[col_name].fillna(False)
            if col_type == np.int64:
                df[col_name] = df[col_name].fillna(0)
            elif col_type == np.float64:
                df[col_name] = df[col_name].fillna(0.0)
            elif col_type == np.object:
                df[col_name] = df[col_name].fillna(' ')
    if 'id' in df.columns:
        new_id_colname = tbl + '_id'
        df = df.rename(columns = {'id':new_id_colname})
    df_to_db(df, tbl, schema = staging)


# Load excel

# xls_path = loader.get_path('scheduled/scheduled_development.xlsx')
# df_to_db(df, 'scheduled_development', schema = staging)

## MarketPointe multifamily rents (some munging needed to load)
xls_path = loader.get_path('price/marketPointe.xlsx')
df = pd.read_excel(xls_path)
df = df.rename(columns = {'$/Sqft':'price_per_sqft', '#Bldg':'number_of_buildings', '#Stories':'stories', 'ZipCode':'zipcode'})
df.index.name = 'idx'
df.stories[df.stories == '2-3'] = 3
df.stories[df.stories == '2-4'] = 4
df.stories[df.stories == '3-4'] = 4
Example #16
0

# Install PostGIS and create staging schema.
loader = TableLoader()
with loader.database.cursor() as cur:
    cur.execute("""
        CREATE EXTENSION IF NOT EXISTS postgis;
        CREATE SCHEMA IF NOT EXISTS staging;
    """)
loader.database.refresh()

# Load shapefiles specified above to the project database.
loader.load_shp_map(shapefiles)

# Fix invalid geometries and reproject.
staging = loader.tables.staging
conform_srids(loader.srid, schema=staging, fix=True)

# Load county land use code mapping.
csv = loader.get_path('built/parcel/2010/rtp13_processing_notes/lucodes.csv')
df = pd.read_csv(csv, dtype=str)
df.dropna(how='any', inplace=True,
          subset=['county_id', 'land_use_type_id', 'development_type_id'])
df.index.name = 'index'
df_to_db(df, 'lucodes', schema=staging)

# Add county land use code mapping unique constraint.
exec_sql("""
ALTER TABLE staging.lucodes ADD CONSTRAINT lucodes_unique
UNIQUE (county_id, land_use_type_id);
""")
Example #17
0
def export_sfr(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_sfr', schema=staging)
Example #18
0
     'clip_high': [np.nan]*len(targetvalues),
     'int_result': [np.nan]*len(targetvalues)})
buildings = scl.scale_to_targets_from_table(buildings, targets_residential_price)

buildings.res_price_per_sqft[(buildings.res_price > 0) * (buildings.sqft_per_unit > 0)] = buildings.res_price/buildings.sqft_per_unit

#Nonresidential price imputation
nonresprice_estimation_dataset = buildings[(buildings.costar_property_type.str.len()>2) & (buildings.res_type == 'other') & (~buildings.costar_rent.isin(['', '-', 'Negotiable', 'Withheld']))]
nonresprice_estimation_dataset['observed_costar_rent'] = nonresprice_estimation_dataset.costar_rent.astype('float')

specification = 'np.log(observed_costar_rent) ~ non_residential_sqft + targetnonressqft + I(development_type_id == "OF") + I(development_type_id == "RT") + I(year_built < 1940) + I(year_built > 1990) + year_built + mean_income + mean_hhsize + mean_hhchildren + mean_numvehicles + mf_sf_ratio + resdensity + empdensity + nr_res_ratio + yearbuilt_av + yearbuilt_sd + stories + I(county_id == 1) + I(county_id == 13) + I(county_id == 41) + I(county_id == 55) + I(county_id == 85) + I(county_id == 81) + I(county_id == 95) + I(county_id == 97) + e11_10 + e21_10 + e22_10 + e23_10 + e3133_10 + e42_10 + e4445_10 + e4849_10 + e51_10 + e52_10 + e53_10 + e54_10 + e55_10 + e56_10 + e61_10 + e62_10 + e71_10 + e72_10 + e81_10 + e92_10 + etot_10'
model = smf.ols(formula=specification, data=nonresprice_estimation_dataset)
results = model.fit()
print results.summary()

nonresbuildings = buildings[(buildings.res_type == 'other') & (buildings.non_residential_sqft > 0)]
sim_data = results.predict(nonresbuildings)
sim_data = np.exp(sim_data)
sim_data = pd.Series(sim_data, index = nonresbuildings.index)
buildings['nonres_rent_per_sqft'] = 0
buildings.loc[sim_data.index,'nonres_rent_per_sqft'] = sim_data

#summary_output_path = loader.get_path('out/regeneration/summaries/price_summary.csv')
price_summary = pd.DataFrame({'avg_nonres_rent_per_sqft':buildings[buildings.nonres_rent_per_sqft>0].groupby('taz').nonres_rent_per_sqft.mean(), 'avg_res_price_per_sqft':buildings[buildings.res_price_per_sqft>0].groupby('taz').res_price_per_sqft.mean(),})
df_to_db(price_summary, 'summary_price', schema=loader.tables.public)


##Now export back to the database
buildings2 = buildings[['parcel_id','county_id', 'land_use_type_id', 'res_type', 'improvement_value', 'year_assessed', 'year_built', 'building_sqft', 'non_residential_sqft', 'residential_units', 'sqft_per_unit', 'stories', 'development_type_id', 'taz', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'nonres_rent_per_sqft', 'res_price_per_sqft']]

df_to_db(buildings2, 'buildings', schema=loader.tables.public)
Example #19
0
def export_smt(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df.res_type[df.res_type.isnull()] = ''
    df_to_db(df, 'attributes_smt', schema=staging)
# Post scaling bound-checking
buildings2.year_built[buildings2.year_built > year_built_upper_bound] = year_built_upper_bound
buildings2.year_built[buildings2.year_built < year_built_lower_bound] = year_built_lower_bound


# COMPARE WITH TARGETS
targetunits['sf'] = buildings2[buildings2.res_type == 'single'].groupby('taz').residential_units.sum()
targetunits['mf'] = buildings2[buildings2.res_type == 'multi'].groupby('taz').residential_units.sum()
targetunits['nrsqft'] = buildings2.groupby('taz').non_residential_sqft.sum()
print targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].head()
targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].sum()
# summary_output_path = loader.get_path('out/regeneration/summaries/built_space_summary.csv')
# targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].to_csv(summary_output_path)
targetunits = targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']]
df_to_db(targetunits, 'summary_built_space', schema=loader.tables.public)


# EXPORT BUILDINGS TO DB
print 'Loading processed buildings to db'
df_to_db(buildings2, 'buildings', schema=loader.tables.public)


## Create geom_id (serves similar purpose to joinnumA) on parcels based on integer representation of geometry hash
idx = []
geom_hashes = []
for i, geom in parcels.geom.iteritems():
    idx.append(i)
    md5_hash = str(hashlib.md5(geom).hexdigest())
    geom_hashes.append(int(md5_hash[0:11], 16))