Python df_to_dbの例、spandex.io.df_to_db Pythonの例

コード例 #1

0

ファイルを表示

ファイル: ala.py プロジェクト: ual/DOE-repo-deliverable

def export_ala(parcels_out):
    df = parcels_out.to_frame()

    # Cast "land_use_type_id" to string for compatibility with other counties.
    df['land_use_type_id'] = df.land_use_type_id.astype(str)

    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_ala', schema=staging)

コード例 #2

0

ファイルを表示

ファイル: ala.py プロジェクト: akselx/bayarea_urbansim-1

def export_ala(parcels_out):
    df = parcels_out.to_frame()

    # Cast "land_use_type_id" to string for compatibility with other counties.
    df['land_use_type_id'] = df.land_use_type_id.astype(str)

    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_ala', schema=staging)

コード例 #3

0

ファイルを表示

ファイル: match_aggregate.py プロジェクト: ual/DOE-repo-deliverable

targetunits['sf'] = buildings2[buildings2.res_type == 'single'].groupby(
    'taz').residential_units.sum()
targetunits['mf'] = buildings2[buildings2.res_type == 'multi'].groupby(
    'taz').residential_units.sum()
targetunits['nrsqft'] = buildings2.groupby('taz').non_residential_sqft.sum()
print targetunits[[
    'sf', 'targetSF', 'mf', 'targetMF', 'nrsqft', 'targetnonressqft'
]].head()
targetunits[['sf', 'targetSF', 'mf', 'targetMF', 'nrsqft',
             'targetnonressqft']].sum()
# summary_output_path = loader.get_path('out/regeneration/summaries/built_space_summary.csv')
# targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].to_csv(summary_output_path)
targetunits = targetunits[[
    'sf', 'targetSF', 'mf', 'targetMF', 'nrsqft', 'targetnonressqft'
]]
df_to_db(targetunits, 'summary_built_space', schema=loader.tables.public)

# EXPORT BUILDINGS TO DB
print 'Loading processed buildings to db'
df_to_db(buildings2, 'buildings', schema=loader.tables.public)

## Create geom_id (serves similar purpose to joinnumA) on parcels based on integer representation of geometry hash
idx = []
geom_hashes = []
for i, geom in parcels.geom.iteritems():
    idx.append(i)
    md5_hash = str(hashlib.md5(geom).hexdigest())
    geom_hashes.append(int(md5_hash[0:11], 16))

parcel_identifier = pd.Series(geom_hashes, index=idx)
parcels['geom_id'] = parcel_identifier

コード例 #4

0

ファイルを表示

ファイル: cnc.py プロジェクト: akselx/bayarea_urbansim-1

def export_cnc(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans
    df.res_type[df.res_type.isnull()] = ''
    df_to_db(df, 'attributes_cnc', schema=staging)

コード例 #5

0

ファイルを表示

ファイル: demand_agent_allocation.py プロジェクト: ual/DOE-repo-deliverable

    probabilities = np.ones(len(alternative_ids))
    num_resunits = len(alts)
    print 'There are %s residential units in TAZ %s' % (num_resunits, taz)
    choices = unit_choice(chooser_ids,alternative_ids,probabilities)
    #households_urbansim.building_id[np.in1d(households_urbansim.index.values,chooser_ids)] = choices.values
    hh.loc[chooser_ids,'building_id'] = choices
    if num_hh > num_resunits:
        print 'Warning:  number of households exceeds number of resunits in TAZ %s' % taz
targetunits['hh_allocated'] = pd.merge(hh, buildings, left_on = 'building_id', right_index = True).groupby('taz_x').size()

df = targetunits[['targetunits', 'hh', 'hh_allocated']]
df['occupancy'] = df.hh*1.0/df.targetunits

print df.head()
#summary_output_path = loader.get_path('out/regeneration/summaries/hh_summary.csv')
df_to_db(df, 'summary_hh', schema=loader.tables.public)



################
#####JOBS#######
################

sector_columns = []
for col in targetunits.columns:
    if col.startswith('e'):
        if col.endswith('_10'):
            sector_columns.append(col)

emp_targets = targetunits[sector_columns]

コード例 #6

0

ファイルを表示

ファイル: son.py プロジェクト: akselx/bayarea_urbansim-1

def export_son(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_son', schema=staging)

コード例 #7

0

ファイルを表示

ファイル: point_based_imputation.py プロジェクト: ual/bayarea_urbansim_archive

import pandas as pd, numpy as np
import pandas.io.sql as sql
from pandas.io.excel import read_excel
from spandex.io import exec_sql,  df_to_db
from spandex import TableLoader

loader = TableLoader()

##Read Redfin CSV and load to database
redfin_csv_path = loader.get_path('built/bldg/homeprices/redfin_03feb14.csv')
redfin = pd.read_csv(redfin_csv_path)
redfin.index.name = 'idx'
df_to_db(redfin, 'redfin', schema=loader.tables.staging)

##Lat/long to point geometry, with the right SRID
exec_sql("ALTER TABLE staging.redfin ADD COLUMN geom geometry;")
exec_sql("UPDATE staging.redfin SET geom = ST_GeomFromText('POINT(' || longitude || ' ' || latitude || ')',4326);")
exec_sql("CREATE INDEX redfin_gidx on staging.redfin using gist (geom);")
exec_sql("SELECT UpdateGeometrySRID('staging', 'redfin', 'geom', 2768);")
exec_sql("UPDATE staging.redfin SET geom = ST_TRANSFORM(ST_SetSRID(geom, 4326), 2768);")

##Append the unique parcel identifier to the Redfin records
exec_sql("alter table staging.redfin add gid integer default 0;")
exec_sql("update staging.redfin set gid = a.gid from parcels a where st_within(staging.redfin.geom, a.geom);")

def db_to_df(query):
    """Executes SQL query and returns DataFrame."""
    conn = loader.database._connection
    return sql.read_frame(query, conn)

コード例 #8

0

ファイルを表示

ファイル: summaries.py プロジェクト: ual/bayarea_urbansim_archive

for grouper in ['county_id', 'taz']:
    df = tf[[grouper, 'non_residential_sqft', 'residential_units']]
    df.dropna(subset=[grouper], inplace=True)

    if grouper == 'taz':
        df[grouper] = df[grouper].astype(int)

    df['count'] = 1
    summary = df.groupby(grouper).sum()

    if grouper == 'taz':
        summary['residential_units_target'] = targetunits
        taz_df = summary

    output_tablename = 'summary_{}'.format(grouper)
    df_to_db(summary, output_tablename, schema=loader.tables.public)

parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels')

config = load_config()
db_config = dict(config.items('database'))

if 'taz_id' in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'").column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;")
if 'parcel_acres' in db_to_df("SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'").column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;")

##  Export parcel shapefile to output directory
os.system('pgsql2shp -f "%s" -h %s -u %s -P %s %s parcel' % (parcel_output_dir, db_config['host'], db_config['user'], db_config['password'], db_config['database']))

##  Export buildings as csv

コード例 #9

0

ファイルを表示

ファイル: process.py プロジェクト: UDST/sandag_urbansim

random_allocate_agents(jobs, buildings, zone_id_col_name='block_id', capacity_col_name='job_spaces')


#EXPORT DEMAND AGENTS TO DB
if 'block_id' in jobs.columns:
    del jobs['block_id']
    
if 'mgra_id' in households.columns:
    del households['mgra_id']
    
for df in [jobs, households]:
    df['building_id'][df['building_id'].isnull()] = -1
    for col in df.columns:
        df[col] = df[col].astype('int')

df_to_db(jobs, 'jobs', schema=loader.tables.public)
df_to_db(households, 'households', schema=loader.tables.public)


#Export formated buildings to db
if 'luz_id' in buildings.columns:
    del buildings['luz_id']

if 'mgra_id' in buildings.columns:
    del buildings['mgra_id']
    
if 'block_id' in buildings.columns:
    del buildings['block_id']
    
if 'job_spaces' in buildings.columns:
    del buildings['job_spaces']

コード例 #10

0

ファイルを表示

                       zone_id_col_name='block_id',
                       capacity_col_name='job_spaces')

#EXPORT DEMAND AGENTS TO DB
if 'block_id' in jobs.columns:
    del jobs['block_id']

if 'mgra_id' in households.columns:
    del households['mgra_id']

for df in [jobs, households]:
    df['building_id'][df['building_id'].isnull()] = -1
    for col in df.columns:
        df[col] = df[col].astype('int')

df_to_db(jobs, 'jobs', schema=loader.tables.public)
df_to_db(households, 'households', schema=loader.tables.public)

#Export formated buildings to db
if 'luz_id' in buildings.columns:
    del buildings['luz_id']

if 'mgra_id' in buildings.columns:
    del buildings['mgra_id']

if 'block_id' in buildings.columns:
    del buildings['block_id']

if 'job_spaces' in buildings.columns:
    del buildings['job_spaces']

コード例 #11

0

ファイルを表示

ファイル: summaries.py プロジェクト: ual/DOE-repo-deliverable

for grouper in ['county_id', 'taz']:
    df = tf[[grouper, 'non_residential_sqft', 'residential_units']]
    df.dropna(subset=[grouper], inplace=True)

    if grouper == 'taz':
        df[grouper] = df[grouper].astype(int)

    df['count'] = 1
    summary = df.groupby(grouper).sum()

    if grouper == 'taz':
        summary['residential_units_target'] = targetunits
        taz_df = summary

    output_tablename = 'summary_{}'.format(grouper)
    df_to_db(summary, output_tablename, schema=loader.tables.public)

parcel_output_dir = loader.get_path('out/regeneration/summaries/parcels')

config = load_config()
db_config = dict(config.items('database'))

if 'taz_id' in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'"
).column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN taz_id to zone_id;")
if 'parcel_acres' in db_to_df(
        "SELECT column_name FROM information_schema.columns  WHERE table_name='parcel'"
).column_name.values:
    exec_sql("ALTER TABLE parcel RENAME COLUMN parcel_acres to acres;")

コード例 #12

0

ファイルを表示

ファイル: test_io.py プロジェクト: UDST/spandex

 def export(parcels_out):
     schema = loader.tables.sample
     df_to_db(parcels_out.to_frame(), 'parcels_out', schema=schema)

コード例 #13

0

ファイルを表示

ファイル: point_based_imputation.py プロジェクト: ual/DOE-repo-deliverable

def csv_to_staging(path, table_name):
    """Loads csv to staging schema on database"""
    df = pd.read_csv(path)
    df.index.name = 'idx'
    print 'Loading %s.' % table_name
    df_to_db(df, table_name, schema=loader.tables.staging)

コード例 #14

0

ファイルを表示

ファイル: point_based_imputation.py プロジェクト: ual/DOE-repo-deliverable

costar = pd.read_excel(costar_xls_path)
costar_solano_path = loader.get_path('built/bldg/costar/2011/costar__clean2011_sol_020315.csv')
costar_sol = pd.read_csv(costar_solano_path)
costar2 = costar[['PropertyID', 'Building Name', 'Latitude', 'Longitude', 'Rentable Building Area', 'Year Built', 'PropertyType', 'Secondary Type', 'Total Available Space (SF)', 'Number Of Elevators', 'Last Sale Date', 'Last Sale Price', 'Average Weighted Rent', 'Number Of Stories']]
costar_sol2 = costar_sol[['PropertyID', 'Building Name', 'Latitude', 'Longitude', 'Rentable Building Area', 'Year Built', 'PropertyType', 'Secondary Type', 'Total Available Space (SF)', 'Number Of Elevators', 'Last Sale Date', 'Last Sale Price', 'Average Weighted Rent', 'Number Of Stories']]
costar2.columns = ['propertyid', 'building_name', 'latitude', 'longitude', 'rentable_area', 'year_built', 'property_type', 'secondary_type', 'available_space', 'elevators', 'last_sale_date', 'last_sale_price', 'rent', 'stories']
costar_sol2.columns = ['propertyid', 'building_name', 'latitude', 'longitude', 'rentable_area', 'year_built', 'property_type', 'secondary_type', 'available_space', 'elevators', 'last_sale_date', 'last_sale_price', 'rent', 'stories']
costar2 = pd.concat([costar2, costar_sol2])
for tex_col in ['building_name', 'property_type', 'secondary_type', 'last_sale_date', ]:
       costar2[tex_col] = costar2[tex_col].fillna(' ')
       costar2[tex_col] = costar2[tex_col].str.encode('utf-8')
costar2.last_sale_date = costar2.last_sale_date.fillna(' ')
costar2.last_sale_price = costar2.last_sale_price.str.replace(",", "").astype('float')
costar2.stories = costar2.stories.fillna(0).astype('int32')
costar2.index.name = 'idx'
df_to_db(costar2, 'costar', schema=loader.tables.staging)
##Lat/long to point geometry, with the right SRID
lat_long_to_point_geometry('costar', 'staging', 'longitude', 'latitude', 'geom', 2768)
##Append the unique parcel identifier to the Costar records
append_parcel_identifier('costar', 'staging', 'geom', 'gid')


######## *IMPUTE* ########
print 'Start point-based impute.'

## Load dataframes for the imputation
parcels = db_to_df('select gid, year_built, sqft_per_unit, non_residential_sqft, stories, imputation_flag from parcels;').set_index('gid')
costar = db_to_df('select * from staging.costar;')
redfin = db_to_df('select gid, yearbuilt, sqft, lastsalepr, saleyear, hometype from staging.redfin;')
gov_buildings = db_to_df('select * from staging.public_bldgs;')
gov_buildings.sqft = gov_buildings.sqft.str.replace(',', '').astype('int')

コード例 #15

0

ファイルを表示

ファイル: load.py プロジェクト: afcarl/sandag_urbansim

            col_name = col[0]
            col_type = col[1]
            firstval = df[col_name].loc[0]
            if firstval in (True, False):
                if type(firstval) == bool:
                    df[col_name] = df[col_name].fillna(False)
            if col_type == np.int64:
                df[col_name] = df[col_name].fillna(0)
            elif col_type == np.float64:
                df[col_name] = df[col_name].fillna(0.0)
            elif col_type == np.object:
                df[col_name] = df[col_name].fillna(' ')
    if 'id' in df.columns:
        new_id_colname = tbl + '_id'
        df = df.rename(columns = {'id':new_id_colname})
    df_to_db(df, tbl, schema = staging)


# Load excel

# xls_path = loader.get_path('scheduled/scheduled_development.xlsx')
# df_to_db(df, 'scheduled_development', schema = staging)

## MarketPointe multifamily rents (some munging needed to load)
xls_path = loader.get_path('price/marketPointe.xlsx')
df = pd.read_excel(xls_path)
df = df.rename(columns = {'$/Sqft':'price_per_sqft', '#Bldg':'number_of_buildings', '#Stories':'stories', 'ZipCode':'zipcode'})
df.index.name = 'idx'
df.stories[df.stories == '2-3'] = 3
df.stories[df.stories == '2-4'] = 4
df.stories[df.stories == '3-4'] = 4

コード例 #16

0

ファイルを表示


# Install PostGIS and create staging schema.
loader = TableLoader()
with loader.database.cursor() as cur:
    cur.execute("""
        CREATE EXTENSION IF NOT EXISTS postgis;
        CREATE SCHEMA IF NOT EXISTS staging;
    """)
loader.database.refresh()

# Load shapefiles specified above to the project database.
loader.load_shp_map(shapefiles)

# Fix invalid geometries and reproject.
staging = loader.tables.staging
conform_srids(loader.srid, schema=staging, fix=True)

# Load county land use code mapping.
csv = loader.get_path('built/parcel/2010/rtp13_processing_notes/lucodes.csv')
df = pd.read_csv(csv, dtype=str)
df.dropna(how='any', inplace=True,
          subset=['county_id', 'land_use_type_id', 'development_type_id'])
df.index.name = 'index'
df_to_db(df, 'lucodes', schema=staging)

# Add county land use code mapping unique constraint.
exec_sql("""
ALTER TABLE staging.lucodes ADD CONSTRAINT lucodes_unique
UNIQUE (county_id, land_use_type_id);
""")

コード例 #17

0

ファイルを表示

ファイル: sfr.py プロジェクト: ual/DOE-repo-deliverable

def export_sfr(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df_to_db(df, 'attributes_sfr', schema=staging)

コード例 #18

0

ファイルを表示

     'clip_high': [np.nan]*len(targetvalues),
     'int_result': [np.nan]*len(targetvalues)})
buildings = scl.scale_to_targets_from_table(buildings, targets_residential_price)

buildings.res_price_per_sqft[(buildings.res_price > 0) * (buildings.sqft_per_unit > 0)] = buildings.res_price/buildings.sqft_per_unit

#Nonresidential price imputation
nonresprice_estimation_dataset = buildings[(buildings.costar_property_type.str.len()>2) & (buildings.res_type == 'other') & (~buildings.costar_rent.isin(['', '-', 'Negotiable', 'Withheld']))]
nonresprice_estimation_dataset['observed_costar_rent'] = nonresprice_estimation_dataset.costar_rent.astype('float')

specification = 'np.log(observed_costar_rent) ~ non_residential_sqft + targetnonressqft + I(development_type_id == "OF") + I(development_type_id == "RT") + I(year_built < 1940) + I(year_built > 1990) + year_built + mean_income + mean_hhsize + mean_hhchildren + mean_numvehicles + mf_sf_ratio + resdensity + empdensity + nr_res_ratio + yearbuilt_av + yearbuilt_sd + stories + I(county_id == 1) + I(county_id == 13) + I(county_id == 41) + I(county_id == 55) + I(county_id == 85) + I(county_id == 81) + I(county_id == 95) + I(county_id == 97) + e11_10 + e21_10 + e22_10 + e23_10 + e3133_10 + e42_10 + e4445_10 + e4849_10 + e51_10 + e52_10 + e53_10 + e54_10 + e55_10 + e56_10 + e61_10 + e62_10 + e71_10 + e72_10 + e81_10 + e92_10 + etot_10'
model = smf.ols(formula=specification, data=nonresprice_estimation_dataset)
results = model.fit()
print results.summary()

nonresbuildings = buildings[(buildings.res_type == 'other') & (buildings.non_residential_sqft > 0)]
sim_data = results.predict(nonresbuildings)
sim_data = np.exp(sim_data)
sim_data = pd.Series(sim_data, index = nonresbuildings.index)
buildings['nonres_rent_per_sqft'] = 0
buildings.loc[sim_data.index,'nonres_rent_per_sqft'] = sim_data

#summary_output_path = loader.get_path('out/regeneration/summaries/price_summary.csv')
price_summary = pd.DataFrame({'avg_nonres_rent_per_sqft':buildings[buildings.nonres_rent_per_sqft>0].groupby('taz').nonres_rent_per_sqft.mean(), 'avg_res_price_per_sqft':buildings[buildings.res_price_per_sqft>0].groupby('taz').res_price_per_sqft.mean(),})
df_to_db(price_summary, 'summary_price', schema=loader.tables.public)


##Now export back to the database
buildings2 = buildings[['parcel_id','county_id', 'land_use_type_id', 'res_type', 'improvement_value', 'year_assessed', 'year_built', 'building_sqft', 'non_residential_sqft', 'residential_units', 'sqft_per_unit', 'stories', 'development_type_id', 'taz', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'nonres_rent_per_sqft', 'res_price_per_sqft']]

df_to_db(buildings2, 'buildings', schema=loader.tables.public)

コード例 #19

0

ファイルを表示

ファイル: smt.py プロジェクト: ual/DOE-repo-deliverable

def export_smt(parcels_out):
    df = parcels_out.to_frame()
    assert df.index.is_unique
    assert not df.index.hasnans()
    df.res_type[df.res_type.isnull()] = ''
    df_to_db(df, 'attributes_smt', schema=staging)

コード例 #20

0

ファイルを表示

ファイル: match_aggregate.py プロジェクト: ual/bayarea_urbansim_archive

# Post scaling bound-checking
buildings2.year_built[buildings2.year_built > year_built_upper_bound] = year_built_upper_bound
buildings2.year_built[buildings2.year_built < year_built_lower_bound] = year_built_lower_bound


# COMPARE WITH TARGETS
targetunits['sf'] = buildings2[buildings2.res_type == 'single'].groupby('taz').residential_units.sum()
targetunits['mf'] = buildings2[buildings2.res_type == 'multi'].groupby('taz').residential_units.sum()
targetunits['nrsqft'] = buildings2.groupby('taz').non_residential_sqft.sum()
print targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].head()
targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].sum()
# summary_output_path = loader.get_path('out/regeneration/summaries/built_space_summary.csv')
# targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']].to_csv(summary_output_path)
targetunits = targetunits[['sf','targetSF','mf','targetMF', 'nrsqft', 'targetnonressqft']]
df_to_db(targetunits, 'summary_built_space', schema=loader.tables.public)


# EXPORT BUILDINGS TO DB
print 'Loading processed buildings to db'
df_to_db(buildings2, 'buildings', schema=loader.tables.public)


## Create geom_id (serves similar purpose to joinnumA) on parcels based on integer representation of geometry hash
idx = []
geom_hashes = []
for i, geom in parcels.geom.iteritems():
    idx.append(i)
    md5_hash = str(hashlib.md5(geom).hexdigest())
    geom_hashes.append(int(md5_hash[0:11], 16))