Python df_to_db Examples, spandex.io.df_to_db Python Examples

Example #1

0

Show file

File: ala.py Project: ual/DOE-repo-deliverable

def export_ala(parcels_out):
    df = parcels_out.to_frame()

    # Cast "land_use_type_id" to string for compatibility with other counties.
    df['land_use_type_id'] = df.land_use_type_id.astype(str)

    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_ala', schema=staging)

Example #2

0

Show file

File: ala.py Project: akselx/bayarea_urbansim-1

def export_ala(parcels_out):
    df = parcels_out.to_frame()

    # Cast "land_use_type_id" to string for compatibility with other counties.
    df['land_use_type_id'] = df.land_use_type_id.astype(str)

    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_ala', schema=staging)

Example #3

0

Show file

File: match_aggregate.py Project: ual/DOE-repo-deliverable

targetunits['sf'] = buildings2[buildings2.res_type == 'single'].groupby(
    'taz').residential_units.sum()
targetunits['mf'] = buildings2[buildings2.res_type == 'multi'].groupby(
    'taz').residential_units.sum()
targetunits['nrsqft'] = buildings2.groupby('taz').non_residential_sqft.sum()
print targetunits[[
    'sf', 'targetSF', 'mf', 'targetMF', 'nrsqft', 'targetnonressqft'
]].head()
targetunits[['sf', 'targetSF', 'mf', 'targetMF', 'nrsqft',
             'targetnonressqft']].sum()
# summary_output_path = loader.get_path('out/regeneration/summaries/built_space_summary.csv')
# targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].to_csv(summary_output_path)
targetunits = targetunits[[
    'sf', 'targetSF', 'mf', 'targetMF', 'nrsqft', 'targetnonressqft'
]]
df_to_db(targetunits, 'summary_built_space', schema=loader.tables.public)

# EXPORT BUILDINGS TO DB
print 'Loading processed buildings to db'
df_to_db(buildings2, 'buildings', schema=loader.tables.public)

## Create geom_id (serves similar purpose to joinnumA) on parcels based on integer representation of geometry hash
idx = []
geom_hashes = []
for i, geom in parcels.geom.iteritems():
    idx.append(i)
    md5_hash = str(hashlib.md5(geom).hexdigest())
    geom_hashes.append(int(md5_hash[0:11], 16))

parcel_identifier = pd.Series(geom_hashes, index=idx)
parcels['geom_id'] = parcel_identifier

Example #4

0

Show file

File: cnc.py Project: akselx/bayarea_urbansim-1

def export_cnc(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans
    df.res_type[df.res_type.isnull()] = ''
    df_to_db(df, 'attributes_cnc', schema=staging)

Example #5

0

Show file

File: demand_agent_allocation.py Project: ual/DOE-repo-deliverable

    probabilities = np.ones(len(alternative_ids))
    num_resunits = len(alts)
    print 'There are %s residential units in TAZ %s' % (num_resunits, taz)
    choices = unit_choice(chooser_ids,alternative_ids,probabilities)
    #households_urbansim.building_id[np.in1d(households_urbansim.index.values,chooser_ids)] = choices.values
    hh.loc[chooser_ids,'building_id'] = choices
    if num_hh > num_resunits:
        print 'Warning:  number of households exceeds number of resunits in TAZ %s' % taz
targetunits['hh_allocated'] = pd.merge(hh, buildings, left_on = 'building_id', right_index = True).groupby('taz_x').size()

df = targetunits[['targetunits', 'hh', 'hh_allocated']]
df['occupancy'] = df.hh*1.0/df.targetunits

print df.head()
#summary_output_path = loader.get_path('out/regeneration/summaries/hh_summary.csv')
df_to_db(df, 'summary_hh', schema=loader.tables.public)



################
#####JOBS#######
################

sector_columns = []
for col in targetunits.columns:
    if col.startswith('e'):
        if col.endswith('_10'):
            sector_columns.append(col)

emp_targets = targetunits[sector_columns]

Example #6

0

Show file

File: son.py Project: akselx/bayarea_urbansim-1

def export_son(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_son', schema=staging)

Example #7

0

Show file

File: point_based_imputation.py Project: ual/bayarea_urbansim_archive

import pandas as pd, numpy as np
import pandas.io.sql as sql
from pandas.io.excel import read_excel
from spandex.io import exec_sql,  df_to_db
from spandex import TableLoader

loader = TableLoader()

##Read Redfin CSV and load to database
redfin_csv_path = loader.get_path('built/bldg/homeprices/redfin_03feb14.csv')
redfin = pd.read_csv(redfin_csv_path)
redfin.index.name = 'idx'
df_to_db(redfin, 'redfin', schema=loader.tables.staging)

##Lat/long to point geometry, with the right SRID
exec_sql("ALTER TABLE staging.redfin ADD COLUMN geom geometry;")
exec_sql("UPDATE staging.redfin SET geom = ST_GeomFromText('POINT(' || longitude || ' ' || latitude || ')',4326);")
exec_sql("CREATE INDEX redfin_gidx on staging.redfin using gist (geom);")
exec_sql("SELECT UpdateGeometrySRID('staging', 'redfin', 'geom', 2768);")
exec_sql("UPDATE staging.redfin SET geom = ST_TRANSFORM(ST_SetSRID(geom, 4326), 2768);")

##Append the unique parcel identifier to the Redfin records
exec_sql("alter table staging.redfin add gid integer default 0;")
exec_sql("update staging.redfin set gid = a.gid from parcels a where st_within(staging.redfin.geom, a.geom);")

def db_to_df(query):
    """Executes SQL query and returns DataFrame."""
    conn = loader.database._connection
    return sql.read_frame(query, conn)

Example #8

0

Show file

File: summaries.py Project: ual/bayarea_urbansim_archive

for grouper in ['county_id', 'taz']:
    df = tf[[grouper, 'non_residential_sqft', 'residential_units']]
    df.dropna(subset=[grouper], inplace=True)

    if grouper == 'taz':
        df[grouper] = df[grouper].astype(int)

    df['count'] = 1
    summary = df.groupby(grouper).sum()

    if grouper == 'taz':
        summary['residential_units_target'] = targetunits
        taz_df = summary

    output_tablename = 'summary_{}'.format(grouper)
    df_to_db(summary, output_tablename, schema=loader.tables.public)

parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels')

config = load_config()
db_config = dict(config.items('database'))

if 'taz_id' in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'").column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;")
if 'parcel_acres' in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'").column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;")

##  Export parcel shapefile to output directory
os.system('pgsql2shp -f "%s" -h %s -u %s -P %s %s parcel' % (parcel_output_dir, db_config['host'], db_config['user'], db_config['password'], db_config['database']))

##  Export buildings as csv

Example #9

0

Show file

File: process.py Project: UDST/sandag_urbansim

random_allocate_agents(jobs, buildings, zone_id_col_name='block_id', capacity_col_name='job_spaces')


#EXPORT DEMAND AGENTS TO DB
if 'block_id' in jobs.columns:
    del jobs['block_id']
    
if 'mgra_id' in households.columns:
    del households['mgra_id']
    
for df in [jobs, households]:
    df['building_id'][df['building_id'].isnull()] = -1
    for col in df.columns:
        df[col] = df[col].astype('int')

df_to_db(jobs, 'jobs', schema=loader.tables.public)
df_to_db(households, 'households', schema=loader.tables.public)


#Export formated buildings to db
if 'luz_id' in buildings.columns:
    del buildings['luz_id']

if 'mgra_id' in buildings.columns:
    del buildings['mgra_id']
    
if 'block_id' in buildings.columns:
    del buildings['block_id']
    
if 'job_spaces' in buildings.columns:
    del buildings['job_spaces']

Example #10

0

Show file

                       zone_id_col_name='block_id',
                       capacity_col_name='job_spaces')

#EXPORT DEMAND AGENTS TO DB
if 'block_id' in jobs.columns:
    del jobs['block_id']

if 'mgra_id' in households.columns:
    del households['mgra_id']

for df in [jobs, households]:
    df['building_id'][df['building_id'].isnull()] = -1
    for col in df.columns:
        df[col] = df[col].astype('int')

df_to_db(jobs, 'jobs', schema=loader.tables.public)
df_to_db(households, 'households', schema=loader.tables.public)

#Export formated buildings to db
if 'luz_id' in buildings.columns:
    del buildings['luz_id']

if 'mgra_id' in buildings.columns:
    del buildings['mgra_id']

if 'block_id' in buildings.columns:
    del buildings['block_id']

if 'job_spaces' in buildings.columns:
    del buildings['job_spaces']

Example #11

0

Show file

File: summaries.py Project: ual/DOE-repo-deliverable

for grouper in ['county_id', 'taz']:
    df = tf[[grouper, 'non_residential_sqft', 'residential_units']]
    df.dropna(subset=[grouper], inplace=True)

    if grouper == 'taz':
        df[grouper] = df[grouper].astype(int)

    df['count'] = 1
    summary = df.groupby(grouper).sum()

    if grouper == 'taz':
        summary['residential_units_target'] = targetunits
        taz_df = summary

    output_tablename = 'summary_{}'.format(grouper)
    df_to_db(summary, output_tablename, schema=loader.tables.public)

parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels')

config = load_config()
db_config = dict(config.items('database'))

if 'taz_id' in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'"
).column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;")
if 'parcel_acres' in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'"
).column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;")

Example #12

0

Show file

File: test_io.py Project: UDST/spandex

 def export(parcels_out):
     schema = loader.tables.sample
     df_to_db(parcels_out.to_frame(), 'parcels_out', schema=schema)

Example #13

0

Show file

File: point_based_imputation.py Project: ual/DOE-repo-deliverable

def csv_to_staging(path, table_name):
    """Loads csv to staging schema on database"""
    df = pd.read_csv(path)
    df.index.name = 'idx'
    print 'Loading %s.' % table_name
    df_to_db(df, table_name, schema=loader.tables.staging)

Example #14

0

Show file

File: point_based_imputation.py Project: ual/DOE-repo-deliverable

costar = pd.read_excel(costar_xls_path)
costar_solano_path = loader.get_path('built/bldg/costar/2011/costar__clean2011_sol_020315.csv')
costar_sol = pd.read_csv(costar_solano_path)
costar2 = costar[['PropertyID', 'Building Name', 'Latitude', 'Longitude', 'Rentable Building Area', 'Year Built', 'PropertyType', 'Secondary Type', 'Total Available Space (SF)', 'Number Of Elevators', 'Last Sale Date', 'Last Sale Price', 'Average Weighted Rent', 'Number Of Stories']]
costar_sol2 = costar_sol[['PropertyID', 'Building Name', 'Latitude', 'Longitude', 'Rentable Building Area', 'Year Built', 'PropertyType', 'Secondary Type', 'Total Available Space (SF)', 'Number Of Elevators', 'Last Sale Date', 'Last Sale Price', 'Average Weighted Rent', 'Number Of Stories']]
costar2.columns = ['propertyid', 'building_name', 'latitude', 'longitude', 'rentable_area', 'year_built', 'property_type', 'secondary_type', 'available_space', 'elevators', 'last_sale_date', 'last_sale_price', 'rent', 'stories']
costar_sol2.columns = ['propertyid', 'building_name', 'latitude', 'longitude', 'rentable_area', 'year_built', 'property_type', 'secondary_type', 'available_space', 'elevators', 'last_sale_date', 'last_sale_price', 'rent', 'stories']
costar2 = pd.concat([costar2, costar_sol2])
for tex_col in ['building_name', 'property_type', 'secondary_type', 'last_sale_date', ]:
       costar2[tex_col] = costar2[tex_col].fillna(' ')
       costar2[tex_col] = costar2[tex_col].str.encode('utf-8')
costar2.last_sale_date = costar2.last_sale_date.fillna(' ')
costar2.last_sale_price = costar2.last_sale_price.str.replace(",", "").astype('float')
costar2.stories = costar2.stories.fillna(0).astype('int32')
costar2.index.name = 'idx'
df_to_db(costar2, 'costar', schema=loader.tables.staging)
##Lat/long to point geometry, with the right SRID
lat_long_to_point_geometry('costar', 'staging', 'longitude', 'latitude', 'geom', 2768)
##Append the unique parcel identifier to the Costar records
append_parcel_identifier('costar', 'staging', 'geom', 'gid')


######## *IMPUTE* ########
print 'Start point-based impute.'

## Load dataframes for the imputation
parcels = db_to_df('select gid, year_built, sqft_per_unit, non_residential_sqft, stories, imputation_flag from parcels;').set_index('gid')
costar = db_to_df('select * from staging.costar;')
redfin = db_to_df('select gid, yearbuilt, sqft, lastsalepr, saleyear, hometype from staging.redfin;')
gov_buildings = db_to_df('select * from staging.public_bldgs;')
gov_buildings.sqft = gov_buildings.sqft.str.replace(',', '').astype('int')

Example #15

0

Show file

File: load.py Project: afcarl/sandag_urbansim

            col_name = col[0]
            col_type = col[1]
            firstval = df[col_name].loc[0]
            if firstval in (True, False):
                if type(firstval) == bool:
                    df[col_name] = df[col_name].fillna(False)
            if col_type == np.int64:
                df[col_name] = df[col_name].fillna(0)
            elif col_type == np.float64:
                df[col_name] = df[col_name].fillna(0.0)
            elif col_type == np.object:
                df[col_name] = df[col_name].fillna(' ')
    if 'id' in df.columns:
        new_id_colname = tbl + '_id'
        df = df.rename(columns = {'id':new_id_colname})
    df_to_db(df, tbl, schema = staging)


# Load excel

# xls_path = loader.get_path('scheduled/scheduled_development.xlsx')
# df_to_db(df, 'scheduled_development', schema = staging)

## MarketPointe multifamily rents (some munging needed to load)
xls_path = loader.get_path('price/marketPointe.xlsx')
df = pd.read_excel(xls_path)
df = df.rename(columns = {'$/Sqft':'price_per_sqft', '#Bldg':'number_of_buildings', '#Stories':'stories', 'ZipCode':'zipcode'})
df.index.name = 'idx'
df.stories[df.stories == '2-3'] = 3
df.stories[df.stories == '2-4'] = 4
df.stories[df.stories == '3-4'] = 4

Example #16

0

Show file


# Install PostGIS and create staging schema.
loader = TableLoader()
with loader.database.cursor() as cur:
    cur.execute("""
        CREATE EXTENSION IF NOT EXISTS postgis;
        CREATE SCHEMA IF NOT EXISTS staging;
    """)
loader.database.refresh()

# Load shapefiles specified above to the project database.
loader.load_shp_map(shapefiles)

# Fix invalid geometries and reproject.
staging = loader.tables.staging
conform_srids(loader.srid, schema=staging, fix=True)

# Load county land use code mapping.
csv = loader.get_path('built/parcel/2010/rtp13_processing_notes/lucodes.csv')
df = pd.read_csv(csv, dtype=str)
df.dropna(how='any', inplace=True,
          subset=['county_id', 'land_use_type_id', 'development_type_id'])
df.index.name = 'index'
df_to_db(df, 'lucodes', schema=staging)

# Add county land use code mapping unique constraint.
exec_sql("""
ALTER TABLE staging.lucodes ADD CONSTRAINT lucodes_unique
UNIQUE (county_id, land_use_type_id);
""")

Example #17

0

Show file

File: sfr.py Project: ual/DOE-repo-deliverable

def export_sfr(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_sfr', schema=staging)

Example #18

0

Show file

     'clip_high': [np.nan]*len(targetvalues),
     'int_result': [np.nan]*len(targetvalues)})
buildings = scl.scale_to_targets_from_table(buildings, targets_residential_price)

buildings.res_price_per_sqft[(buildings.res_price > 0) * (buildings.sqft_per_unit > 0)] = buildings.res_price/buildings.sqft_per_unit

#Nonresidential price imputation
nonresprice_estimation_dataset = buildings[(buildings.costar_property_type.str.len()>2) & (buildings.res_type == 'other') & (~buildings.costar_rent.isin(['', '-', 'Negotiable', 'Withheld']))]
nonresprice_estimation_dataset['observed_costar_rent'] = nonresprice_estimation_dataset.costar_rent.astype('float')

specification = 'np.log(observed_costar_rent) ~ non_residential_sqft + targetnonressqft + I(development_type_id == "OF") + I(development_type_id == "RT") + I(year_built < 1940) + I(year_built > 1990) + year_built + mean_income + mean_hhsize + mean_hhchildren + mean_numvehicles + mf_sf_ratio + resdensity + empdensity + nr_res_ratio + yearbuilt_av + yearbuilt_sd + stories + I(county_id == 1) + I(county_id == 13) + I(county_id == 41) + I(county_id == 55) + I(county_id == 85) + I(county_id == 81) + I(county_id == 95) + I(county_id == 97) + e11_10 + e21_10 + e22_10 + e23_10 + e3133_10 + e42_10 + e4445_10 + e4849_10 + e51_10 + e52_10 + e53_10 + e54_10 + e55_10 + e56_10 + e61_10 + e62_10 + e71_10 + e72_10 + e81_10 + e92_10 + etot_10'
model = smf.ols(formula=specification, data=nonresprice_estimation_dataset)
results = model.fit()
print results.summary()

nonresbuildings = buildings[(buildings.res_type == 'other') & (buildings.non_residential_sqft > 0)]
sim_data = results.predict(nonresbuildings)
sim_data = np.exp(sim_data)
sim_data = pd.Series(sim_data, index = nonresbuildings.index)
buildings['nonres_rent_per_sqft'] = 0
buildings.loc[sim_data.index,'nonres_rent_per_sqft'] = sim_data

#summary_output_path = loader.get_path('out/regeneration/summaries/price_summary.csv')
price_summary = pd.DataFrame({'avg_nonres_rent_per_sqft':buildings[buildings.nonres_rent_per_sqft>0].groupby('taz').nonres_rent_per_sqft.mean(), 'avg_res_price_per_sqft':buildings[buildings.res_price_per_sqft>0].groupby('taz').res_price_per_sqft.mean(),})
df_to_db(price_summary, 'summary_price', schema=loader.tables.public)


##Now export back to the database
buildings2 = buildings[['parcel_id','county_id', 'land_use_type_id', 'res_type', 'improvement_value', 'year_assessed', 'year_built', 'building_sqft', 'non_residential_sqft', 'residential_units', 'sqft_per_unit', 'stories', 'development_type_id', 'taz', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'nonres_rent_per_sqft', 'res_price_per_sqft']]

df_to_db(buildings2, 'buildings', schema=loader.tables.public)

Example #19

0

Show file

File: smt.py Project: ual/DOE-repo-deliverable

def export_smt(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df.res_type[df.res_type.isnull()] = ''
    df_to_db(df, 'attributes_smt', schema=staging)

Example #20

0

Show file

File: match_aggregate.py Project: ual/bayarea_urbansim_archive

# Post scaling bound-checking
buildings2.year_built[buildings2.year_built > year_built_upper_bound] = year_built_upper_bound
buildings2.year_built[buildings2.year_built < year_built_lower_bound] = year_built_lower_bound


# COMPARE WITH TARGETS
targetunits['sf'] = buildings2[buildings2.res_type == 'single'].groupby('taz').residential_units.sum()
targetunits['mf'] = buildings2[buildings2.res_type == 'multi'].groupby('taz').residential_units.sum()
targetunits['nrsqft'] = buildings2.groupby('taz').non_residential_sqft.sum()
print targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].head()
targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].sum()
# summary_output_path = loader.get_path('out/regeneration/summaries/built_space_summary.csv')
# targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].to_csv(summary_output_path)
targetunits = targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']]
df_to_db(targetunits, 'summary_built_space', schema=loader.tables.public)


# EXPORT BUILDINGS TO DB
print 'Loading processed buildings to db'
df_to_db(buildings2, 'buildings', schema=loader.tables.public)


## Create geom_id (serves similar purpose to joinnumA) on parcels based on integer representation of geometry hash
idx = []
geom_hashes = []
for i, geom in parcels.geom.iteritems():
    idx.append(i)
    md5_hash = str(hashlib.md5(geom).hexdigest())
    geom_hashes.append(int(md5_hash[0:11], 16))