def simulate(dset, config, year=None, show=True, variables=None): global NETWORKS if not NETWORKS: assert 'networks' in config netconfig = config['networks'] assert 'filenames' in netconfig and 'factors' in netconfig and 'maxdistances' in netconfig and 'twoway' in netconfig impedances = netconfig[ 'impedances'] if 'impedances' in netconfig else None NETWORKS = Networks( [os.path.join(misc.data_dir(), x) for x in netconfig['filenames']], factors=netconfig['factors'], maxdistances=netconfig['maxdistances'], twoway=netconfig['twoway'], impedances=impedances) t1 = time.time() if "ind_vars" not in config: raise Exception("No ind_vars specification") if "var_lib" not in config: raise Exception("All network variables are defined in local var_lib") _tbl_ = pd.DataFrame(index=pd.MultiIndex.from_tuples(NETWORKS.nodeids)) for varname in config["ind_vars"]: expression = config["var_lib"][varname] _tbl_[varname] = eval(expression).astype('float') if 'show' in config and config['show']: print _tbl_.describe() if "writetotmp" in config: dset.save_tmptbl(config["writetotmp"], _tbl_) print "Finished executing in %f seconds" % (time.time() - t1)
def fetch_networks(self,reload=True,maxdistance=30,rootdir=None,custom_impedances=None): if not reload: return networks.Networks(os.path.join(misc.data_dir(),'network%d.pkl')) network = networks.Networks() network.process_network(maxdistance,rootdir,walkminutes=1,custom_impedances=custom_impedances) self.networks = network return self.networks
def simulate(dset,config,year=None,show=True,variables=None): global NETWORKS if not NETWORKS: assert 'networks' in config netconfig = config['networks'] assert 'filenames' in netconfig and 'factors' in netconfig and 'maxdistances' in netconfig and 'twoway' in netconfig impedances = netconfig['impedances'] if 'impedances' in netconfig else None NETWORKS = Networks([os.path.join(misc.data_dir(),x) for x in netconfig['filenames']], factors=netconfig['factors'],maxdistances=netconfig['maxdistances'],twoway=netconfig['twoway'], impedances=impedances) t1 = time.time() if "ind_vars" not in config: raise Exception("No ind_vars specification") if "var_lib" not in config: raise Exception("All network variables are defined in local var_lib") _tbl_ = pd.DataFrame(index=pd.MultiIndex.from_tuples(NETWORKS.nodeids)) for varname in config["ind_vars"]: expression = config["var_lib"][varname] _tbl_[varname] = eval(expression).astype('float') if 'show' in config and config['show']: print _tbl_.describe() if "writetotmp" in config: dset.save_tmptbl(config["writetotmp"],_tbl_) print "Finished executing in %f seconds" % (time.time()-t1)
def estimate_elasticity(self, zones): dummies = pd.get_dummies(zones.county) zones = pd.concat([zones, dummies], axis=1) zones['avg_far'] = self.buildings_far.groupby('zone_id').far.mean() #use far_x because Xavier's code adds far to buildings #zones = zones[zones.residential_sqft_zone>0] #wrook = py.queen_from_shapefile('C:/users/jmartinez/documents/Test Zones/zones_prj_res2.shp') wqueen = py.queen_from_shapefile(os.path.join(misc.data_dir(),'shapefiles\\zones.shp')) w = py.weights.weights.W(wqueen.neighbors, wqueen.weights) x = zones[['zonal_pop','mean_income']] x = x.apply(np.log1p) x['ln_jobs_within_30min'] = zones['ln_jobs_within_30min'] x['zone_contains_park'] = zones['zone_contains_park'] x['Arapahoe'] = zones['Arapahoe'] x['Boulder'] = zones['Boulder'] x['Broomfield'] = zones['Broomfield'] x['Clear Creek'] = zones['Clear Creek'] x['Denver'] = zones['Denver'] x['Douglas'] = zones['Douglas'] x['Elbert'] = zones['Elbert'] x['Gilpin'] = zones['Gilpin'] x['Jefferson'] = zones['Jefferson'] x['Weld'] = zones['Weld'] x=x.fillna(0) x = x.as_matrix() imat = zones[['ln_avg_nonres_unit_price_zone','avg_far']] imat = imat.fillna(0) imat = imat.as_matrix() yend = zones['ln_avg_unit_price_zone'] yend = yend.fillna(0) yend = yend.as_matrix() yend = np.reshape(yend,(zones.shape[0],1)) y = zones['residential_sqft_zone'] y = y.fillna(0) y = y.apply(np.log1p) y = y.as_matrix() y = np.reshape(y,(zones.shape[0],1)) imat_names = ['non_res_price','avg_far'] x_names = ['zonal_pop', 'mean_income', 'ln_jobs_within_30min', 'zone_contains_park','Arapahoe','Boulder','Broomfield','Clear Creek','Denver','Douglas','Elbert','Gilpin','Jefferson','Weld'] yend_name = ['ln_avg_unit_price_zone'] y_name = 'residential_sqft_zone' reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, w_lags=2, robust ='white', name_x = x_names, name_q = imat_names, name_y = y_name, name_yend = yend_name) demand_elasticity = np.absolute(reg_2sls.betas[15]) demand_elasticity = 1/demand_elasticity[0] # return demand_elasticity
def data_zone_census( zones): data_census=pd.read_csv(os.path.join(misc.data_dir(),'census_zone.csv')) #del data_census['median_value'] data=pd.merge(zones, data_census, on='zone_id', how='inner') #Income using census block group dat data['median_income']=data['median_income'].astype(float) data['ln_inc']=np.log(data['median_income']) # Asked price (census) #data['median_value']=data['median_value'].apply(float) data['ln_price']=np.log(data['median_value']) # Race composition data['all races']=data['White alone'].apply(float)+ data['Black or African American alone'].apply(float)\ + data['American Indian and Alaska Native alone'].apply(float)+ data['Asian alone'].apply(float)\ +data['Native Hawaiian and Other Pacific Islander alone'].apply(float)+ data['Some other race alone'].apply(float)\ +data['two races or more'].apply(float) data['percent_white']=np.log(data['White alone']/data['all races']) data['percent_black']=data['Black or African American alone']/data['all races'] data['percent_black2']=data['Black or African American alone']/data['all races']**2 data['ln_residential_sqft_mean2']=data['ln_residential_sqft_mean']**2 # Creating max and min income of neighbors ( can important have implications in terms of gentrification) geo=pd.DataFrame(data['zonecentroid_x']) geo['zonecentroid_y']=data['zonecentroid_y'] geo=np.array(geo) w=pysal.knnW(geo, k=10) n=len(geo) neigh_income_max=np.zeros(n) neigh_income_min=np.zeros(n) for i in range(0, n-1): arr=w.neighbors[i] zone=np.zeros(n) for j in arr: zone[j]=1 data['neigh']=zone neigh_income_max[i]=data[data['neigh']==1].median_income.max() neigh_income_min[i]=data[data['neigh']==1].median_income.min() data['ln_neigh_income_max']=np.log(neigh_income_max/data['median_income']) data['ln_neigh_income_min']=np.log(neigh_income_min/data['median_income']) data=data.set_index(data['zone_id']) return data
def second_stage(depvar, indvar, data, instrumented, instr, indvariv, fixedeffect): # Instrumentation (first stage) data=instrument(instrumented, indvariv, data, instr, fixedeffect) # Make sure that there is no inf or nan in the RHS/LHS variables for varname in depvar + indvar + fixedeffect: data=data[np.isfinite(data[varname])] #data=data[data['median_value']<400000] # Generate dummies for categorical variables and remove one of them (to avoid multi-collinearity) x=pd.get_dummies(data['school_district_id'], prefix='sdis') del x['sdis_8'] # Fill the righ hand side with instruments for varname in indvar: x[varname]=data[varname] # Replace the instrumented variable by ita predictor from stage one for varname in instrumented: x[varname]=data[varname+'_iv'] # Add a constant x['const']=1 print x # Main Regression. GLM estimation using a Negative Binomial family (it seems to work better than other families) mod=sm.GLM(data[depvar], x, family=sm.families.Poisson()) result=mod.fit() # Return Coefficient collist=list(x.columns.values) dset.store_coeff("coeff_residential",result.params.values,result.params.index) coeff_store_path = os.path.join(misc.data_dir(),'coeffs_res.h5') coeff_store = pd.HDFStore(coeff_store_path) coeff_store['coeffs_res'] = dset.coeffs coeff_store.close() # Predicted Prices data['sim_price']=result.predict() print result.summary() return data
def fetch_batshh(self,tenure=None): if USECHTS: batshh = pd.read_csv(os.path.join(misc.data_dir(),'bats2013MTC_household.csv')) batshh = batshh[batshh['INCOM'] < 90] # remove bogus income records batshh['income_quartile'] = pd.qcut(batshh['INCOM'],4).labels batshh['HHINCOME'] = batshh['INCOM'] if tenure == "sales": batshh = batshh[batshh['OWN']==1] elif tenure == "rent": batshh = batshh[batshh['OWN']==2] return batshh else: batshh = self.store['batshh'] batshh = batshh[batshh['HHINCOME'] < 16] # remove bogus income records batshh['income_quartile'] = pd.qcut(batshh['HHINCOME'],4).labels if tenure == "rent": batshh = batshh[batshh['TENURE']==1] elif tenure == "sales": batshh = batshh[batshh['TENURE']<>1] return batshh
def simulate(dset, year, depvar='building_id', alternatives=None, simulation_table=None, output_names=None, agents_groupby=[ 'income_3_tenure', ], transition_config=None, relocation_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) if 'persons' in ct.columns: del ct['persons'] ct["total_number_of_households"] = ( ct["total_number_of_households"] * transition_config['scaling_factor']).astype('int32') hh = dset.fetch('households') persons = dset.fetch('persons') tran = transition.TabularTotalsTransition( ct, 'total_number_of_households') model = transition.TransitionModel(tran) #import pdb; pdb.set_trace() new, added, new_linked = model.transition( hh, year, linked_tables={'linked': (persons, 'household_id')}) new.loc[added, 'building_id'] = -1 dset.d['households'] = new dset.d['persons'] = new_linked['linked'] #calculate mortgage payment values temp_count = 0 buildings = alternatives out_table = pd.DataFrame(columns=dset.households.columns) homeless = pd.DataFrame(columns=dset.households.columns) r = .05 / 12 n = 360 buildings['est_mortgage_payment'] = buildings.unit_price_residential * ( (r * (1 + r)**n) / ((1 + r)**n - 1)) dset.households.index.name = 'household_id' choosers = dset.fetch(simulation_table) if relocation_config['Enabled']: rate_table = dset.store[ relocation_config['relocation_rates_table']].copy() rate_field = "probability_of_relocating" rate_table[rate_field] = rate_table[ rate_field] * .01 * relocation_config['scaling_factor'] movers = dset.relocation_rates(choosers, rate_table, rate_field) choosers[depvar].ix[movers] = -1 movers_all = choosers[choosers[depvar] == -1] #distribute county_ids based on demography projections county_growth_share = pd.read_csv(os.path.join(misc.data_dir(), 'county_growth_share.csv'), index_col=0) counties = county_growth_share.columns.values current_growth_shares = county_growth_share.loc[year].values movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares) movers_all['county_id'] = movers_counties empty_units = dset.buildings[(dset.buildings.residential_units > 0)].residential_units.sub( choosers.groupby('building_id').size(), fill_value=0) empty_units = empty_units[empty_units > 0].order(ascending=False) alts = alternatives.ix[np.repeat(empty_units.index.values, empty_units.values.astype('int'))] #create alternatives subset with mortage info r = .05 / 12 n = 360 try: subset_alts = alts[['unit_price_residential', 'county_id']] except: subset_alts = alts[['unit_price_residential', 'county_id_y']] subset_alts.rename(columns={'county_id_y': 'county_id'}, inplace=True) subset_alts['payment'] = alts.unit_price_residential * ((r * (1 + r)**n) / ((1 + r)**n - 1)) #generate probabilities pdf = gen_probs(dset, movers_all, agents_groupby, alts, output_names) #build data structures for loop #income_3_tenure limits income_limits = { 1: 60000 / 12, 2: 120000 / 12, 3: dset.households.income.max() / 12, 4: 40000 / 12, 5: dset.households.income.max() / 12 } bool_price1 = (subset_alts.payment / income_limits[1]) <= 0.33 bool_price2 = (subset_alts.payment / income_limits[2]) <= 0.33 bool_price3 = (subset_alts.payment / income_limits[3]) <= 0.33 bool_price4 = (subset_alts.payment / income_limits[4]) <= 0.33 bool_price5 = (subset_alts.payment / income_limits[5]) <= 0.33 d = {} for county in counties: data_list = [] bool_counties = subset_alts.county_id == int(county) ids1 = subset_alts.loc[(bool_counties) & (bool_price1)].index.tolist() ids2 = subset_alts.loc[(bool_counties) & (bool_price2)].index.tolist() ids3 = subset_alts.loc[(bool_counties) & (bool_price3)].index.tolist() ids4 = subset_alts.loc[(bool_counties) & (bool_price4)].index.tolist() ids5 = subset_alts.loc[(bool_counties) & (bool_price5)].index.tolist() ##generate lists of probabilities prob1 = pdf.loc[set(ids1), 'segment1'].tolist() prob2 = pdf.loc[set(ids2), 'segment2'].tolist() prob3 = pdf.loc[set(ids3), 'segment3'].tolist() prob4 = pdf.loc[set(ids4), 'segment4'].tolist() prob5 = pdf.loc[set(ids5), 'segment5'].tolist() data_list.append((ids1, prob1)) data_list.append((ids2, prob2)) data_list.append((ids3, prob3)) data_list.append((ids4, prob4)) data_list.append((ids5, prob5)) d[int(county)] = data_list #call placing method m_loop = movers_all[['income_3_tenure', 'county_id', 'building_id']] #m_loop = m_loop.head(5000) out_list = [] from functools import partial mapfunc = partial(apply_func, d=d, out=out_list) p = mp.Pool(processes=4) split_dfs = np.array_split(m_loop, 4) pool_results = p.map(mapfunc, split_dfs) p.close() p.join() #m_loop.apply(place_households, axis=1, args=(d,out_list)) master_list = pool_results[0] + pool_results[1] + pool_results[ 2] + pool_results[3] building_ids = [i[0] for i in master_list] household_id = [i[1] for i in master_list] result_frame = pd.DataFrame(columns=['household_id', 'building_id']) result_frame['household_id'] = household_id result_frame['building_id'] = building_ids # dset.households.loc[result_frame.household_id, 'building_id'] = result_frame['building_id'].values # #result_frame.to_csv('c:/users/jmartinez/documents/test_results.csv') #print out_list dset.households.loc[result_frame.household_id]
def fetch_factual(self): return pd.read_csv(os.path.join(misc.data_dir(),'factual_places.csv'))
import numpy as np, pandas as pd, os from synthicity.utils import misc from drcog.models import dataset dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5')) np.random.seed(1) import statsmodels.api as sm #import pygwr_kernel import random """ This program estimates an hedonic model for prices of residential and non-residential buildings. The benchmark method combines: 1/ A geographically weighted regression to account for spatial non-stationarity 2/ Poisson or Negative Binonial General Linear Model to estimate a log-linear model with heteroskedastic error terms 3/ Zone employment (later-on when the data is fixed, zone average income or household characteristics) is instrumented with average buildings characteristics in neighboring zones. The program is organized in four parts: 1/ Create a dataset for estimation 2/ Run the first stage least squares (average zonal employment regressed on county fixed effect and neighboring zones characteristics). The predicted zonal employment is used as an instrument in all following regressions 3/ Run a GLM GWR methods and obtain local hedonoc parameters. 4/ Generate average coefficient for each zone """ ## Part 1: extract variables and build dataset for estimation def data_estimation(dset, buildings,parcels,fars,zones,establishments, bid): bp=buildings p=parcels f=fars
def estimate_elasticity(self, zones): dummies = pd.get_dummies(zones.county) zones = pd.concat([zones, dummies], axis=1) zones['avg_far'] = self.buildings_far.groupby('zone_id').far.mean( ) #use far_x because Xavier's code adds far to buildings #zones = zones[zones.residential_sqft_zone>0] #wrook = py.queen_from_shapefile('C:/users/jmartinez/documents/Test Zones/zones_prj_res2.shp') wqueen = py.queen_from_shapefile( os.path.join(misc.data_dir(), 'shapefiles\\zones.shp')) w = py.weights.weights.W(wqueen.neighbors, wqueen.weights) x = zones[['zonal_pop', 'mean_income']] x = x.apply(np.log1p) x['ln_jobs_within_30min'] = zones['ln_jobs_within_30min'] x['zone_contains_park'] = zones['zone_contains_park'] x['Arapahoe'] = zones['Arapahoe'] x['Boulder'] = zones['Boulder'] x['Broomfield'] = zones['Broomfield'] x['Clear Creek'] = zones['Clear Creek'] x['Denver'] = zones['Denver'] x['Douglas'] = zones['Douglas'] x['Elbert'] = zones['Elbert'] x['Gilpin'] = zones['Gilpin'] x['Jefferson'] = zones['Jefferson'] x['Weld'] = zones['Weld'] x = x.fillna(0) x = x.as_matrix() imat = zones[['ln_avg_nonres_unit_price_zone', 'avg_far']] imat = imat.fillna(0) imat = imat.as_matrix() yend = zones['ln_avg_unit_price_zone'] yend = yend.fillna(0) yend = yend.as_matrix() yend = np.reshape(yend, (zones.shape[0], 1)) y = zones['residential_sqft_zone'] y = y.fillna(0) y = y.apply(np.log1p) y = y.as_matrix() y = np.reshape(y, (zones.shape[0], 1)) imat_names = ['non_res_price', 'avg_far'] x_names = [ 'zonal_pop', 'mean_income', 'ln_jobs_within_30min', 'zone_contains_park', 'Arapahoe', 'Boulder', 'Broomfield', 'Clear Creek', 'Denver', 'Douglas', 'Elbert', 'Gilpin', 'Jefferson', 'Weld' ] yend_name = ['ln_avg_unit_price_zone'] y_name = 'residential_sqft_zone' reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, w_lags=2, robust='white', name_x=x_names, name_q=imat_names, name_y=y_name, name_yend=yend_name) demand_elasticity = np.absolute(reg_2sls.betas[15]) demand_elasticity = 1 / demand_elasticity[0] # return demand_elasticity
def calculate_variables(dset): ##PARCEL VARIABLES # XG: Fix the mismatch between zone and county p = dset.parcels del p['county_id'] zone_county=pd.read_csv('C:\urbansim\data/TAZ_County_Table.csv') zone_county=zone_county.set_index('zone_id') zone_county=zone_county[['county_id']] p=pd.merge(p,zone_county, left_on='zone_id', right_index=True) pu=p #end of fix if p.index.name != 'parcel_id': p = p.set_index('parcel_id') print p[p.zone_id==1725].x p['in_denver'] = (p.county_id==8031).astype('int32') p['ln_dist_rail'] = p.dist_rail.apply(np.log1p) p['ln_dist_bus'] = p.dist_bus.apply(np.log1p) p['ln_land_value'] = p.land_value.apply(np.log1p) p['land_value_per_sqft'] = p.land_value*1.0/p.parcel_sqft p['rail_within_mile'] = (p.dist_rail<5280).astype('int32') p['cherry_creek_school_district'] = (p.school_district==8).astype('int32') p['acres'] = p.parcel_sqft/43560.0 p['ln_acres'] = (p.parcel_sqft/43560.0).apply(np.log1p) #BUILDING VARIABLES b = dset.fetch('buildings',building_sqft_per_job_table=elcm_configuration['building_sqft_per_job_table'],bsqft_job_scaling=elcm_configuration['scaling_factor']) b = b[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential']] b.loc[:, 'zone_id'] = p.zone_id[b.parcel_id].values bsqft_job = dset.building_sqft_per_job #bsqft_job.building_sqft_per_job = bsqft_job.building_sqft_per_job b = pd.merge(b,bsqft_job,left_on=['zone_id','building_type_id'],right_index=True,how='left') b["non_residential_units"] = b.non_residential_sqft/b.building_sqft_per_job##### b["base_year_jobs"] = dset.establishments.groupby('building_id').employees.sum() # things get all screwed up if you have overfull buildings b["non_residential_units"] = b[["non_residential_units","base_year_jobs"]].max(axis=1) b["all_units"] = b.residential_units + b.non_residential_units b['county_id'] = p.county_id[b.parcel_id].values b['townhome'] = (b.building_type_id==24).astype('int32') b['multifamily'] = (np.in1d(b.building_type_id,[2,3])).astype('int32') b['office'] = (b.building_type_id==5).astype('int32') b['retail_or_restaurant'] = (np.in1d(b.building_type_id,[17,18])).astype('int32') b['industrial_building'] = (np.in1d(b.building_type_id,[9,22])).astype('int32') b['residential_sqft'] = (b.bldg_sq_ft - b.non_residential_sqft) b['btype_hlcm'] = 1*(b.building_type_id==2) + 2*(b.building_type_id==3) + 3*(b.building_type_id==20) + 4*np.invert(np.in1d(b.building_type_id,[2,3,20])) b['county8001'] = (b.county_id==8001).astype('int32') b['county8005'] = (b.county_id==8005).astype('int32') b['county8013'] = (b.county_id==8013).astype('int32') b['county8014'] = (b.county_id==8014).astype('int32') b['county8019'] = (b.county_id==8019).astype('int32') b['county8031'] = (b.county_id==8031).astype('int32') b['county8035'] = (b.county_id==8035).astype('int32') b['county8039'] = (b.county_id==8039).astype('int32') b['county8047'] = (b.county_id==8047).astype('int32') b['county8059'] = (b.county_id==8059).astype('int32') b['county8123'] = (b.county_id==8123).astype('int32') b[ 'unit_price_res_sqft']=b[b.residential_units>0].unit_price_residential/b[b.residential_units>0].bldg_sq_ft p['nonres_far'] = (b.groupby('parcel_id').non_residential_sqft.sum()/p.acres).apply(np.log1p) p['ln_units_per_acre'] = (b.groupby('parcel_id').residential_units.sum()/p.acres).apply(np.log1p) #HOUSEHOLD VARIABLES hh_estim = dset.fetch('households_for_estimation') hh_estim['tenure'] = 1 hh_estim.loc[hh_estim.own>1, "tenure"] = 2 # corrected chained index error hh_estim['income']=0 hh_estim.loc[hh_estim.income_group==1, "income"] = 7500 # corrected chained index error hh_estim.loc[hh_estim.income_group==2, "income"] = 17500 # corrected chained index error hh_estim.loc[hh_estim.income_group==3, "income"] = 25000 # corrected chained index error hh_estim.loc[hh_estim.income_group==4, "income"] = 35000 # corrected chained index error hh_estim.loc[hh_estim.income_group==5, "income"] = 45000 # corrected chained index error hh_estim.loc[hh_estim.income_group==6, "income"] = 55000 # corrected chained index error hh_estim.loc[hh_estim.income_group==7, "income"] = 67500 # corrected chained index error hh_estim.loc[hh_estim.income_group==8, "income"] = 87500 # corrected chained index error hh_estim.loc[hh_estim.income_group==9, "income"] = 117500 # corrected chained index error hh_estim.loc[hh_estim.income_group==10, "income"] = 142500 # corrected chained index error hh_estim.loc[hh_estim.income_group==11, "income"] = 200000 # corrected chained index error hh = dset.fetch('households') for table in [hh_estim, hh]: choosers = table choosers['zone_id'] = b.zone_id[choosers.building_id].values choosers['building_type_id'] = b.building_type_id[choosers.building_id].values choosers['county_id'] = b.county_id[choosers.building_id].values choosers['btype'] = 1*(choosers.building_type_id==2) + 2*(choosers.building_type_id==3) + 3*(choosers.building_type_id==20) + 4*np.invert(np.in1d(choosers.building_type_id,[2,3,20])) choosers['income_3_tenure'] = 1 * (choosers.income < 60000)*(choosers.tenure == 1) + 2 * np.logical_and(choosers.income >= 60000, choosers.income < 120000)*(choosers.tenure == 1) + 3*(choosers.income >= 120000)*(choosers.tenure == 1) + 4*(choosers.income < 40000)*(choosers.tenure == 2) + 5*(choosers.income >= 40000)*(choosers.tenure == 2) choosers['younghead'] = choosers.age_of_head<30 choosers['hh_with_child'] = choosers.children>0 choosers['ln_income'] = choosers.income.apply(np.log1p) choosers['income5xlt'] = choosers.income*5.0 choosers['income10xlt'] = choosers.income*5.0 choosers['wkrs_hhs'] = choosers.workers*1.0/choosers.persons #ESTABLISHMENT VARIABLES e = dset.fetch('establishments') e['zone_id'] = b.zone_id[e.building_id].values e['county_id'] = b.county_id[e.building_id].values e['sector_id_six'] = 1*(e.sector_id==61) + 2*(e.sector_id==71) + 3*np.in1d(e.sector_id,[11,21,22,23,31,32,33,42,48,49]) + 4*np.in1d(e.sector_id,[7221,7222,7224]) + 5*np.in1d(e.sector_id,[44,45,7211,7212,7213,7223]) + 6*np.in1d(e.sector_id,[51,52,53,54,55,56,62,81,92]) e['sector_id_retail_agg'] = e.sector_id*np.logical_not(np.in1d(e.sector_id,[7211,7212,7213])) + 7211*np.in1d(e.sector_id,[7211,7212,7213]) e['nonres_sqft'] = b.non_residential_sqft[e.building_id].values #ZONE VARIABLES #XG: fix the mismatch zone county z = dset.fetch('zones') del z['county'] z['zone_id']=z.index zone_county=pd.read_csv(os.path.join(misc.data_dir(), 'TAZ_County_Table.csv')) zone_county=zone_county.set_index('zone_id') zone_county=zone_county[['county']] z=pd.merge(z,zone_county, left_on='zone_id', right_index=True) del z['zone_id'] zu=z #end of fix z['zonal_hh'] = hh.groupby('zone_id').size() z['zonal_emp'] = e.groupby('zone_id').employees.sum() z['zone_id']=z.index print z.columns print z[z['zone_id']==1722]['zonal_emp'] del z['zone_id'] z['residential_sqft_zone'] = b.groupby('zone_id').residential_sqft.sum() z['zonal_pop'] = hh.groupby('zone_id').persons.sum() z['residential_units_zone'] = b.groupby('zone_id').residential_units.sum() z['ln_residential_units_zone'] = b.groupby('zone_id').residential_units.sum().apply(np.log1p) z['ln_residential_unit_density_zone'] = (b.groupby('zone_id').residential_units.sum()/z.acreage).apply(np.log1p) z['non_residential_sqft_zone'] = b.groupby('zone_id').non_residential_sqft.sum() z['ln_non_residential_sqft_zone'] = b.groupby('zone_id').non_residential_sqft.sum().apply(np.log1p) z['percent_sf'] = b[b.btype_hlcm==3].groupby('zone_id').residential_units.sum()*100.0/(b.groupby('zone_id').residential_units.sum()) z['avg_unit_price_zone'] = b[(b.residential_units>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_residential.mean() z['ln_avg_unit_price_zone'] = b[(b.residential_units>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_residential.mean().apply(np.log1p) z['ln_avg_nonres_unit_price_zone'] = b[(b.non_residential_sqft>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_non_residential.mean().apply(np.log1p) z['median_age_of_head'] = hh.groupby('zone_id').age_of_head.median() z['mean_income'] = hh.groupby('zone_id').income.mean() z['median_year_built'] = b.groupby('zone_id').year_built.median().astype('int32') z['ln_avg_land_value_per_sqft_zone'] = p.groupby('zone_id').land_value_per_sqft.mean().apply(np.log1p) z['median_yearbuilt_post_1990'] = (b.groupby('zone_id').year_built.median()>1990).astype('int32') z['median_yearbuilt_pre_1950'] = (b.groupby('zone_id').year_built.median()<1950).astype('int32') z['percent_hh_with_child'] = hh[hh.children>0].groupby('zone_id').size()*100.0/z.zonal_hh z['percent_renter_hh_in_zone'] = hh[hh.tenure==2].groupby('zone_id').size()*100.0/z.zonal_hh z['percent_younghead'] = hh[hh.age_of_head<30].groupby('zone_id').size()*100.0/z.zonal_hh z['average_resunit_size'] = b.groupby('zone_id').sqft_per_unit.mean() z['zone_contains_park'] = (p[p.lu_type_id==14].groupby('zone_id').size()>0).astype('int32') z['emp_sector_agg'] = e[e.sector_id==1].groupby('zone_id').employees.sum() z['emp_sector1'] = e[e.sector_id_six==1].groupby('zone_id').employees.sum() z['emp_sector2'] = e[e.sector_id_six==2].groupby('zone_id').employees.sum() z['emp_sector3'] = e[e.sector_id_six==3].groupby('zone_id').employees.sum() z['emp_sector4'] = e[e.sector_id_six==4].groupby('zone_id').employees.sum() z['emp_sector5'] = e[e.sector_id_six==5].groupby('zone_id').employees.sum() z['emp_sector6'] = e[e.sector_id_six==6].groupby('zone_id').employees.sum() z['jobs_within_45min'] = dset.compute_range(z.zonal_emp,45.0) z['ln_jobs_within_45min'] = dset.compute_range(z.zonal_emp,45.0).apply(np.log1p) z['jobs_within_30min'] = dset.compute_range(z.zonal_emp,30.0) z['ln_jobs_within_30min'] = dset.compute_range(z.zonal_emp,30.0).apply(np.log1p) z['jobs_within_20min'] = dset.compute_range(z.zonal_emp,20.0) z['jobs_within_15min'] = dset.compute_range(z.zonal_emp,15.0) z['ln_jobs_within_20min'] = dset.compute_range(z.zonal_emp,20.0).apply(np.log1p) z['ln_pop_within_20min'] = dset.compute_range(z.zonal_pop,20.0).apply(np.log1p) z['ln_emp_aggsector_within_5min'] = dset.compute_range(z.emp_sector_agg,5.0).apply(np.log1p) z['ln_emp_sector1_within_15min'] = dset.compute_range(z.emp_sector1,15.0).apply(np.log1p) z['ln_emp_sector2_within_15min'] = dset.compute_range(z.emp_sector2,15.0).apply(np.log1p) z['ln_emp_sector3_within_10min'] = dset.compute_range(z.emp_sector3,15.0).apply(np.log1p) z['ln_emp_sector3_within_15min'] = dset.compute_range(z.emp_sector3,15.0).apply(np.log1p) z['ln_emp_sector3_within_20min'] = dset.compute_range(z.emp_sector3,20.0).apply(np.log1p) z['ln_emp_sector4_within_15min'] = dset.compute_range(z.emp_sector4,15.0).apply(np.log1p) z['ln_emp_sector5_within_15min'] = dset.compute_range(z.emp_sector5,15.0).apply(np.log1p) z['ln_emp_sector6_within_15min'] = dset.compute_range(z.emp_sector6,15.0).apply(np.log1p) z['allpurpose_agglosum_floor'] = (z.allpurpose_agglosum>=0)*(z.allpurpose_agglosum) #Exports (for Tableau-Employment) z['emp_sector1_within_20min'] = dset.compute_range(z.emp_sector1,20.0) z['emp_sector2_within_20min'] = dset.compute_range(z.emp_sector2,20.0) z['emp_sector3_within_20min'] = dset.compute_range(z.emp_sector3,20.0) z['emp_sector4_within_20min'] = dset.compute_range(z.emp_sector4,20.0) z['emp_sector5_within_20min'] = dset.compute_range(z.emp_sector5,20.0) z['emp_sector6_within_20min'] = dset.compute_range(z.emp_sector6,20.0) z['emp_sector1_within_30min'] = dset.compute_range(z.emp_sector1,30.0) z['emp_sector2_within_30min'] = dset.compute_range(z.emp_sector2,30.0) z['emp_sector3_within_30min'] = dset.compute_range(z.emp_sector3,30.0) z['emp_sector4_within_30min'] = dset.compute_range(z.emp_sector4,30.0) z['emp_sector5_within_30min'] = dset.compute_range(z.emp_sector5,30.0) z['emp_sector6_within_30min'] = dset.compute_range(z.emp_sector6,30.0) z['emp_sector1_within_45min'] = dset.compute_range(z.emp_sector1,45.0) z['emp_sector2_within_45min'] = dset.compute_range(z.emp_sector2,45.0) z['emp_sector3_within_45min'] = dset.compute_range(z.emp_sector3,45.0) z['emp_sector4_within_45min'] = dset.compute_range(z.emp_sector4,45.0) z['emp_sector5_within_45min'] = dset.compute_range(z.emp_sector5,45.0) z['emp_sector6_within_45min'] = dset.compute_range(z.emp_sector6,45.0) z['residential_unit_per_jobs_within_15_min']= z['residential_units_zone']/z['jobs_within_15min'] z['residential_sqft_per_jobs_within_15_min']= (b[np.in1d(b['building_type_id'], [2,3,20,24])].groupby('zone_id').bldg_sq_ft.sum())/z['jobs_within_15min'] ztableau=z[['zonal_emp', 'emp_sector1','emp_sector2', 'emp_sector3', 'emp_sector4', 'emp_sector5', 'emp_sector6' , 'jobs_within_45min', 'jobs_within_30min','jobs_within_20min', 'emp_sector1_within_20min','emp_sector2_within_20min','emp_sector3_within_20min','emp_sector4_within_20min' ,'emp_sector5_within_20min','emp_sector6_within_20min' , 'emp_sector1_within_30min','emp_sector2_within_30min','emp_sector3_within_30min','emp_sector4_within_30min' ,'emp_sector5_within_30min','emp_sector6_within_30min' , 'emp_sector1_within_45min','emp_sector2_within_45min','emp_sector3_within_45min','emp_sector4_within_45min' ,'emp_sector5_within_45min','emp_sector6_within_45min','residential_unit_per_jobs_within_15_min' ,'residential_sqft_per_jobs_within_15_min']] ztableau.to_csv('C:\urbansim\output\emp_tableau.csv') ##JOINS #merge parcels with zones pz = pd.merge(p.reset_index(),z,left_on='zone_id',right_index=True) pz = pz.set_index('parcel_id') #merge buildings with parcels/zones del b['county_id'] del b['zone_id'] bpz = pd.merge(b,pz,left_on='parcel_id',right_index=True) bpz['residential_units_capacity'] = bpz.parcel_sqft/1500 - bpz.residential_units bpz.loc[bpz.residential_units_capacity < 0, "residential_units_capacity"] = 0 # corrected chained index error dset.d['buildings'] = bpz if dset.parcels.index.name != 'parcel_id': dset.parcels = pu dset.d['zones']=zu #dset.d['parcels']=pu
# print '%d units were filled' %(new_building_count - old_building_count) #buildings = buildings.drop(new_homes) #temp_count += 1 if (temp_count > 50): break #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv') dset.households.loc[out_table.index] = out_table #homeless.to_csv('C:/users/jmartinez/documents/homeless.csv') if __name__ == '__main__': from drcog.models import dataset from drcog.variables import variable_library import os import cProfile dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(), 'drcog.h5')) #Load estimated coefficients coeff_store = pd.HDFStore(os.path.join(misc.data_dir(), 'coeffs.h5')) dset.coeffs = coeff_store.coeffs.copy() coeff_store.close() coeff_store = pd.HDFStore(os.path.join(misc.data_dir(), 'coeffs_res.h5')) dset.coeffs_res = coeff_store.coeffs_res.copy() coeff_store.close() variable_library.calculate_variables(dset) alternatives = dset.buildings[(dset.buildings.residential_units > 0)] sim_year = 2011 fnc = "simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'households',output_names = ('drcog-coeff-hlcm-%s.csv','DRCOG HOUSEHOLD LOCATION CHOICE MODELS (%s)','hh_location_%s','household_building_ids')," +\ "agents_groupby= ['income_3_tenure',],transition_config = {'Enabled':True,'control_totals_table':'annual_household_control_totals','scaling_factor':1.0}," +\
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = None, output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) if 'persons' in ct.columns: del ct['persons'] ct["total_number_of_households"] = (ct["total_number_of_households"]*transition_config['scaling_factor']).astype('int32') hh = dset.fetch('households') persons = dset.fetch('persons') tran = transition.TabularTotalsTransition(ct, 'total_number_of_households') model = transition.TransitionModel(tran) #import pdb; pdb.set_trace() new, added, new_linked = model.transition( hh, year, linked_tables={'linked': (persons, 'household_id')}) new.loc[added,'building_id'] = -1 dset.d['households'] = new dset.d['persons'] = new_linked['linked'] #calculate mortgage payment values temp_count = 0 buildings = alternatives out_table = pd.DataFrame(columns=dset.households.columns) homeless = pd.DataFrame(columns=dset.households.columns) r = .05/12 n = 360 buildings['est_mortgage_payment']=buildings.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1)) dset.households.index.name = 'household_id' choosers = dset.fetch(simulation_table) if relocation_config['Enabled']: rate_table = dset.store[relocation_config['relocation_rates_table']].copy() rate_field = "probability_of_relocating" rate_table[rate_field] = rate_table[rate_field]*.01*relocation_config['scaling_factor'] movers = dset.relocation_rates(choosers,rate_table,rate_field) choosers[depvar].ix[movers] = -1 movers_all = choosers[choosers[depvar]==-1] #distribute county_ids based on demography projections county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),'county_growth_share.csv'), index_col=0 ) counties = county_growth_share.columns.values current_growth_shares = county_growth_share.loc[year].values movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares) movers_all['county_id'] = movers_counties empty_units = dset.buildings[(dset.buildings.residential_units>0)].residential_units.sub(choosers.groupby('building_id').size(),fill_value=0) empty_units = empty_units[empty_units>0].order(ascending=False) alts = alternatives.ix[np.repeat(empty_units.index.values,empty_units.values.astype('int'))] #create alternatives subset with mortage info r = .05/12 n = 360 try: subset_alts = alts[['unit_price_residential', 'county_id']] except: subset_alts = alts[['unit_price_residential', 'county_id_y']] subset_alts.rename(columns={'county_id_y':'county_id'}, inplace=True) subset_alts['payment'] = alts.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1)) #generate probabilities pdf = gen_probs(dset, movers_all, agents_groupby, alts, output_names) #build data structures for loop #income_3_tenure limits income_limits = {1:60000/12, 2:120000/12, 3:dset.households.income.max()/12, 4:40000/12, 5:dset.households.income.max()/12} bool_price1 = (subset_alts.payment / income_limits[1]) <= 0.33 bool_price2 = (subset_alts.payment / income_limits[2]) <= 0.33 bool_price3 = (subset_alts.payment / income_limits[3]) <= 0.33 bool_price4 = (subset_alts.payment / income_limits[4]) <= 0.33 bool_price5 = (subset_alts.payment / income_limits[5]) <= 0.33 d = {} for county in counties: data_list = [] bool_counties = subset_alts.county_id == int(county) ids1 = subset_alts.loc[(bool_counties) & (bool_price1)].index.tolist() ids2 = subset_alts.loc[(bool_counties) & (bool_price2)].index.tolist() ids3 = subset_alts.loc[(bool_counties) & (bool_price3)].index.tolist() ids4 = subset_alts.loc[(bool_counties) & (bool_price4)].index.tolist() ids5 = subset_alts.loc[(bool_counties) & (bool_price5)].index.tolist() ##generate lists of probabilities prob1 = pdf.loc[set(ids1), 'segment1'].tolist() prob2 = pdf.loc[set(ids2), 'segment2'].tolist() prob3 = pdf.loc[set(ids3), 'segment3'].tolist() prob4 = pdf.loc[set(ids4), 'segment4'].tolist() prob5 = pdf.loc[set(ids5), 'segment5'].tolist() data_list.append((ids1, prob1)) data_list.append((ids2, prob2)) data_list.append((ids3, prob3)) data_list.append((ids4, prob4)) data_list.append((ids5, prob5)) d[int(county)] = data_list #call placing method m_loop = movers_all[['income_3_tenure','county_id','building_id']] #m_loop = m_loop.head(5000) out_list = [] from functools import partial mapfunc = partial(apply_func, d=d, out=out_list) p = mp.Pool(processes=4) split_dfs = np.array_split(m_loop, 4) pool_results = p.map(mapfunc, split_dfs) p.close() p.join() #m_loop.apply(place_households, axis=1, args=(d,out_list)) master_list = pool_results[0] + pool_results[1] + pool_results[2] + pool_results[3] building_ids = [i[0] for i in master_list] household_id = [i[1] for i in master_list] result_frame = pd.DataFrame(columns=['household_id', 'building_id']) result_frame['household_id'] = household_id result_frame['building_id'] = building_ids # dset.households.loc[result_frame.household_id, 'building_id'] = result_frame['building_id'].values # #result_frame.to_csv('c:/users/jmartinez/documents/test_results.csv') #print out_list dset.households.loc[result_frame.household_id]
def add_rows(data, nrows, starting_index=None): """ Add rows to data table according to a given nrows. New rows will have their IDs set to NaN. Parameters ---------- data : pandas.DataFrame nrows : int Number of rows to add. starting_index : int, optional The starting index from which to calculate indexes for the new rows. If not given the max + 1 of the index of `data` will be used. Returns ------- updated : pandas.DataFrame Table with rows added. New rows will have their index values set to NaN. added : pandas.Index New indexes of the rows that were added. copied : pandas.Index Indexes of rows that were copied. A row copied multiple times will have multiple entries. """ if nrows == 0: return data, _empty_index(), _empty_index() if not starting_index: starting_index = data.index.values.max() + 1 ###added code to alter age distribution per State Demographer's data #import migration data migration = pd.read_csv( os.path.join(misc.data_dir(), 'NetMigrationByAge.csv')) #migration = pd.read_csv('C:/users/jmartinez/documents/Age Distribution UrbanSim/NetMigrationByAge.csv') migration.columns = ['county', 'age', 'net_migration'] migration = migration[15: 90] #only use ages that are in the households table migration['prob_age'] = migration[ 'net_migration'] / migration.net_migration.sum() #create weights array random_ages = np.random.choice( migration.age, nrows, p=migration.prob_age) #randomly choose ages with with wighted pdf frame = pd.DataFrame() frame['ages'] = random_ages grp = frame.groupby('ages').size( ) #group by age to know the number of ages randomly chosen from above random choice (line 55) agg_list = [] for i in grp.iteritems(): age_val = i[0] age_count = i[1] array = np.random.choice( data[data.age_of_head == age_val].index.values, age_count) for j in array: agg_list.append(j) #####original code #i_to_copy = np.random.choice(data.index.values, nrows) ###randomly chooses household index to copy -- could make it better by assigning a distribution to weight picks based on likely new household characteristics new_rows = data.loc[agg_list].copy( ) #creates new dataframe of copied households added_index = pd.Index( np.arange(starting_index, starting_index + nrows, dtype=np.int)) new_rows.index = added_index #correctly assigns index ###temporarily export for analysis #new_rows.to_csv('C:/users/jmartinez/documents/Age Distribution UrbanSim/households_newdist.csv') return pd.concat([data, new_rows]), added_index, pd.Index(agg_list)
def fetch_csv(self,name,**kwargs): if name in self.d: return self.d[name] tbl = pd.read_csv(os.path.join(misc.data_dir(),name),**kwargs) self.d[name] = tbl return tbl
def run(self, name=None, export_buildings_to_urbancanvas=False, base_year=2010, forecast_year=None, fixed_seed=True, random_seed=1, export_indicators=True, indicator_output_directory='C:/opus/data/drcog2/runs', core_components_to_run=None, household_transition=None,household_relocation=None,employment_transition=None, elcm_configuration=None, developer_configuration=None, calibration_configuration=None, hh_targets=None, ru_targets=None, emp_targets=None, nrsqft_targets=None): """Runs an UrbanSim2 scenario """ ##Calibration targets #resunit_targets = np.array([.198,.205,.105,.032,.002,.165,.142,.014,.002,.099,.037]) #hh_targets = np.array([.198,.205,.105,.032,.002,.165,.142,.014,.002,.099,.037]) #emp_targets = np.array([0.1511,0.2232,0.0737,0.0473,0.0001,0.2435,0.1094,0.0139,0.0005,0.1178,0.0197]) #nonres_targets = np.array([0.1511,0.2232,0.0737,0.0473,0.0001,0.2435,0.1094,0.0139,0.0005,0.1178,0.0197]) hh_targets = np.array([hh_targets['hh_8001_target'],hh_targets['hh_8005_target'],hh_targets['hh_8013_target'],hh_targets['hh_8014_target'],hh_targets['hh_8019_target'],hh_targets['hh_8031_target'],hh_targets['hh_8035_target'],hh_targets['hh_8039_target'],hh_targets['hh_8047_target'],hh_targets['hh_8059_target'],hh_targets['hh_8123_target']]) resunit_targets = np.array([ru_targets['ru_8001_target'],ru_targets['ru_8005_target'],ru_targets['ru_8013_target'],ru_targets['ru_8014_target'],ru_targets['ru_8019_target'],ru_targets['ru_8031_target'],ru_targets['ru_8035_target'],ru_targets['ru_8039_target'],ru_targets['ru_8047_target'],ru_targets['ru_8059_target'],ru_targets['ru_8123_target']]) emp_targets = np.array([emp_targets['emp_8001_target'],emp_targets['emp_8005_target'],emp_targets['emp_8013_target'],emp_targets['emp_8014_target'],emp_targets['emp_8019_target'],emp_targets['emp_8031_target'],emp_targets['emp_8035_target'],emp_targets['emp_8039_target'],emp_targets['emp_8047_target'],emp_targets['emp_8059_target'],emp_targets['emp_8123_target']]) nonres_targets = np.array([nrsqft_targets['nr_8001_target'],nrsqft_targets['nr_8005_target'],nrsqft_targets['nr_8013_target'],nrsqft_targets['nr_8014_target'],nrsqft_targets['nr_8019_target'],nrsqft_targets['nr_8031_target'],nrsqft_targets['nr_8035_target'],nrsqft_targets['nr_8039_target'],nrsqft_targets['nr_8047_target'],nrsqft_targets['nr_8059_target'],nrsqft_targets['nr_8123_target']]) county_id = np.array([8001,8005,8013,8014,8019,8031,8035,8039,8047,8059,8123]) targets = pd.DataFrame({'county_id':county_id,'resunit_target':resunit_targets,'hh_target':hh_targets,'emp_target':emp_targets,'nonres_target':nonres_targets}) delta = calibration_configuration['coefficient_step_size'] margin = calibration_configuration['match_target_within'] iterations = calibration_configuration['iterations'] for it in range(iterations): logger.log_status('Calibration iteration: ' + str(it)) logger.log_status('Starting UrbanSim2 run.') dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5')) seconds_start = time.time() if fixed_seed: logger.log_status('Running with fixed random seed.') np.random.seed(random_seed) #Load estimated coefficients coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs.h5')) dset.coeffs = coeff_store.coeffs.copy() coeff_store.close() coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs_res.h5')) dset.coeffs_res = coeff_store.coeffs_res.copy() coeff_store.close() #Keep track of unplaced agents by year unplaced_hh = [] unplaced_emp = [] for sim_year in range(base_year,forecast_year+1): print 'Simulating year ' + str(sim_year) logger.log_status(sim_year) ##Variable Library calculations variable_library.calculate_variables(dset) #Record pre-demand model zone-level household/job totals hh_zone1 = dset.fetch('households').groupby('zone_id').size() emp_zone1 = dset.fetch('establishments').groupby('zone_id').employees.sum() ############ ELCM SIMULATION if core_components_to_run['ELCM']: logger.log_status('ELCM simulation.') alternatives = dset.buildings[(dset.buildings.non_residential_sqft>0)] elcm_simulation.simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'establishments',output_names = ("drcog-coeff-elcm-%s.csv","DRCOG EMPLOYMENT LOCATION CHOICE MODELS (%s)","emp_location_%s","establishment_building_ids"), agents_groupby= ['sector_id_retail_agg',],transition_config = {'Enabled':True,'control_totals_table':'annual_employment_control_totals','scaling_factor':1.0}) ################# HLCM simulation if core_components_to_run['HLCM']: logger.log_status('HLCM simulation.') alternatives = dset.buildings[(dset.buildings.residential_units>0)] new_hlcm_simulation.simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'households',output_names = ("drcog-coeff-hlcm-%s.csv","DRCOG HOUSEHOLD LOCATION CHOICE MODELS (%s)","hh_location_%s","household_building_ids"), agents_groupby= ['income_3_tenure',],transition_config = {'Enabled':True,'control_totals_table':'annual_household_control_totals','scaling_factor':1.0}, relocation_config = {'Enabled':True,'relocation_rates_table':'annual_household_relocation_rates','scaling_factor':1.0},) ############ REPM SIMULATION if core_components_to_run['Price']: logger.log_status('REPM simulation.') #Residential #Residential census_model_simulation.simulate_residential(dset, 'unit_price_res_sqft', 'school_district_id', 10, sim_year) #Non-residential regression_model_simulation.simulate(dset, year=sim_year,output_varname='unit_price_non_residential', simulation_table='buildings', output_names = ["drcog-coeff-nrhedonic-%s.csv","DRCOG NRHEDONIC MODEL (%s)","nrprice_%s"], agents_groupby = 'building_type_id', segment_ids = [5,8,11,16,17,18,21,23,9,22]) ############ DEVELOPER SIMULATION if core_components_to_run['Developer']: logger.log_status('Proforma simulation.') buildings, newbuildings = proforma_developer_model.run(dset,hh_zone1,emp_zone1,developer_configuration,sim_year) dset.d['buildings'] = pd.concat([buildings,newbuildings]) ########### Indicators # if export_indicators: # unplaced_hh.append((dset.households.building_id==-1).sum()) # unplaced_emp.append(dset.establishments[dset.establishments.building_id==-1].employees.sum()) # if sim_year == forecast_year: # logger.log_status('Exporting indicators') # indicators.run(dset, indicator_output_directory, forecast_year) ########### TRAVEL MODEL # if travel_model_configuration['export_to_tm']: # if sim_year in travel_model_configuration['years_to_run']: # logger.log_status('Exporting to TM') # export_zonal_file.export_zonal_file_to_tm(dset,sim_year,tm_input_dir=travel_model_configuration['tm_input_dir']) elapsed = time.time() - seconds_start print "TOTAL elapsed time: " + str(elapsed) + " seconds." ########### Calibration logger.log_status('Calibration coefficient updating') import math hh_submodels = [] for col in dset.coeffs.columns: if col[0].startswith('hh_') and col[1]=='fnames': hh_submodels.append(col[0]) emp_submodels = [] for col in dset.coeffs.columns: if col[0].startswith('emp_') and col[1]=='fnames': emp_submodels.append(col[0]) #Record base values for temporal comparison hh = dset.store.households e = dset.store.establishments b = dset.store.buildings p = dset.store.parcels.set_index('parcel_id') if p.index.name != 'parcel_id': p=p.set_index(p['parcel_id']) b['county_id'] = p.county_id[b.parcel_id].values hh['county_id'] = b.county_id[hh.building_id].values e['county_id'] = b.county_id[e.building_id].values base_hh_county = hh.groupby('county_id').size() base_emp_county = e.groupby('county_id').employees.sum() base_ru_county = b.groupby('county_id').residential_units.sum() base_nr_county = b.groupby('county_id').non_residential_sqft.sum() #Calibration indicators b = dset.fetch('buildings') e = dset.fetch('establishments') hh = dset.fetch('households') p = dset.parcels if p.index.name != 'parcel_id': p = p.set_index(p['parcel_id']) b['county_id'] = p.county_id[b.parcel_id].values hh['county_id'] = b.county_id[hh.building_id].values e['county_id'] = b.county_id[e.building_id].values sim_hh_county = hh.groupby('county_id').size() sim_emp_county = e.groupby('county_id').employees.sum() sim_ru_county = b.groupby('county_id').residential_units.sum() sim_nr_county = b.groupby('county_id').non_residential_sqft.sum() hh_diff_county = sim_hh_county - base_hh_county emp_diff_county = sim_emp_county - base_emp_county ru_diff_county = sim_ru_county - base_ru_county nr_diff_county = sim_nr_county - base_nr_county prop_growth_emp = emp_diff_county*1.0/emp_diff_county.sum() prop_growth_hh = hh_diff_county*1.0/hh_diff_county.sum() prop_growth_ru = ru_diff_county*1.0/ru_diff_county.sum() prop_growth_nr = nr_diff_county*1.0/nr_diff_county.sum() county_args = pd.read_csv(os.path.join(misc.data_dir(),'county_calib.csv')).set_index('county_id') i = 0;j = 0;k = 0;m = 0 for x in targets.county_id.values: cid = int(x) print cid prop_ru = prop_growth_ru[cid] prop_hh = prop_growth_hh[cid] prop_emp = prop_growth_emp[cid] prop_nonres = prop_growth_nr[cid] print 'ru prop is ' + str(prop_ru) print 'nsqft prop is ' + str(prop_nonres) print 'hh prop is ' + str(prop_hh) print 'emp prop is ' + str(prop_emp) logger.log_status('ru prop is ' + str(prop_ru)) logger.log_status('nsqft prop is ' + str(prop_nonres)) logger.log_status('hh prop is ' + str(prop_hh)) logger.log_status('emp prop is ' + str(prop_emp)) target_ru = targets.resunit_target[targets.county_id==cid].values[0] target_hh = targets.hh_target[targets.county_id==cid].values[0] target_emp = targets.emp_target[targets.county_id==cid].values[0] target_nonres = targets.nonres_target[targets.county_id==cid].values[0] print 'ru target is ' + str(target_ru) print 'nsqft target is ' + str(target_nonres) print 'hh target is ' + str(target_hh) print 'emp target is ' + str(target_emp) logger.log_status('ru target is ' + str(target_ru)) logger.log_status('nsqft target is ' + str(target_nonres)) logger.log_status('hh target is ' + str(target_hh)) logger.log_status('emp target is ' + str(target_emp)) varname = 'county%s' % (cid) print varname if (prop_ru > (target_ru - margin)) and (prop_ru < (target_ru + margin)): print 'NO ru action.' logger.log_status('NO ru action.') i = i + 1 elif math.isnan(prop_ru) or (prop_ru < target_ru): county_args.chh_demand_factor[cid] = county_args.chh_demand_factor[cid].astype(float) + 0.01 county_args.cres_price_factor[cid] = county_args.cres_price_factor[cid].astype(float) + 0.01 print 'ru action is PLUS' logger.log_status('ru action is PLUS') elif prop_ru > target_ru: county_args.chh_demand_factor[cid] = county_args.chh_demand_factor[cid].astype(float) - 0.01 county_args.cres_price_factor[cid] = county_args.cres_price_factor[cid].astype(float) - 0.01 print 'ru action is MINUS' logger.log_status('ru action is MINUS') if (prop_hh > (target_hh - margin)) and (prop_hh < (target_hh + margin)): print 'NO hh action.' logger.log_status('NO hh action.') j = j + 1 elif math.isnan(prop_hh) or (prop_hh < target_hh): for submodel in hh_submodels: dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] = dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] + delta print 'hh action is PLUS' logger.log_status('hh action is PLUS') elif prop_hh > target_hh: for submodel in hh_submodels: dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] = dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] - delta print 'hh action is MINUS' logger.log_status('hh action is MINUS') if (prop_emp > (target_emp - margin)) and (prop_emp < (target_emp + margin)): print 'NO emp action.' logger.log_status('NO emp action.') k = k + 1 elif math.isnan(prop_emp) or (prop_emp < target_emp): for submodel in emp_submodels: dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] = dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] + delta print 'emp action is PLUS' logger.log_status('emp action is PLUS') elif prop_emp > target_emp: for submodel in emp_submodels: dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] = dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] - delta print 'emp action is MINUS' logger.log_status('emp action is MINUS') if (prop_nonres > (target_nonres - margin)) and (prop_nonres < (target_nonres + margin)): print 'NO nonres action.' logger.log_status('NO nonres action.') m = m + 1 elif math.isnan(prop_nonres) or (prop_nonres < target_nonres): county_args.cemp_demand_factor[cid] = county_args.cemp_demand_factor[cid].astype(float) + 0.01 county_args.cnonres_price_factor[cid] = county_args.cnonres_price_factor[cid].astype(float) + 0.01 print county_args.cnonres_price_factor[cid] print 'nonres action is PLUS' logger.log_status('nonres action is PLUS') elif prop_nonres > target_nonres: county_args.cemp_demand_factor[cid] = county_args.cemp_demand_factor[cid].astype(float) - 0.01 county_args.cnonres_price_factor[cid] = county_args.cnonres_price_factor[cid].astype(float) - 0.01 print 'nonres action is MINUS' print county_args.cnonres_price_factor[cid] logger.log_status('nonres action is MINUS') print i,j,k,m logger.log_status('Number of hh county targets met: %s' % j) logger.log_status('Number of emp county targets met: %s' % k) logger.log_status('Number of ru county targets met: %s' % i) logger.log_status('Number of nr county targets met: %s' % m) ###Save calibrated coefficients at the end of each iteration coeff_store_path = os.path.join(misc.data_dir(),'coeffs.h5') coeff_store = pd.HDFStore(coeff_store_path) coeff_store['coeffs'] = dset.coeffs coeff_store.close() county_args.to_csv(os.path.join(misc.data_dir(),'county_calib.csv'))
def run(dset,hh_zone1,emp_zone1,developer_configuration,sim_year): #Record post-demand-model change in zone-level household/job totals hh = dset.fetch('households') e = dset.fetch('establishments') buildings = dset.fetch('buildings') parcels = dset.parcels if parcels.index.name != 'parcel_id': parcels = parcels.set_index(parcels['parcel_id']) buildings['zone_id'] = parcels.zone_id[buildings.parcel_id].values #e['zone_id'] = buildings.zone_id[e.building_id].values hh['zone_id'] = buildings.zone_id[hh.building_id].values hh_zone2 = hh.groupby('zone_id').size() emp_zone2 = e.groupby('zone_id').employees.sum() zdiff = pd.DataFrame(index=dset.zones.index) ####### zdiff['hh_zone1'] = hh_zone1 zdiff['hh_zone2'] = hh_zone2 zdiff['emp_zone1'] = emp_zone1 zdiff['emp_zone2'] = emp_zone2 zdiff = zdiff.fillna(0) zdiff.hh_zone2 = zdiff.hh_zone2+5 zdiff.emp_zone2 = zdiff.emp_zone2+5 hh_zone_diff = (zdiff.hh_zone2 - zdiff.hh_zone1) emp_zone_diff = (zdiff.emp_zone2 - zdiff.emp_zone1) #####Get the user inputted zone args if developer_configuration['zonal_levers']: zone_args = pd.read_csv(os.path.join(misc.data_dir(),'devmodal_zone_args.csv')).set_index('zone_id') ##Getting county_id into zone_args. Eventually, lets move the dset.zones operations to the varlib dset.zones['county_id'] = 0 dset.zones.loc[dset.zones.county == 'Adams', "county_id"] = 8001 # corrected chained index error dset.zones.loc[dset.zones.county == 'Arapahoe', "county_id"] = 8005 # corrected chained index error dset.zones.loc[dset.zones.county == 'Boulder', "county_id"] = 8013 # corrected chained index error dset.zones.loc[dset.zones.county == 'Broomfield', "county_id"] = 8014 # corrected chained index error dset.zones.loc[dset.zones.county == 'Clear Creek', "county_id"] = 8019 # corrected chained index error dset.zones.loc[dset.zones.county == 'Denver', "county_id"] = 8031 # corrected chained index error dset.zones.loc[dset.zones.county == 'Douglas', "county_id"] = 8035 # corrected chained index error dset.zones.loc[dset.zones.county == 'Elbert', "county_id"] = 8039 # corrected chained index error dset.zones.loc[dset.zones.county == 'Gilpin', "county_id"] = 8047 # corrected chained index error dset.zones.loc[dset.zones.county == 'Jefferson', "county_id"] = 8059 # corrected chained index error dset.zones.loc[dset.zones.county == 'Weld', "county_id"] = 8123 # corrected chained index error zone_args['cid'] = dset.zones.county_id pd.set_option('display.max_rows', 1000) ##Loading/applying county calib factors to scale the zone args county_args = pd.read_csv(os.path.join(misc.data_dir(),'county_calib.csv')).set_index('county_id') zone_args['county_id']=zone_args['cid'] zone_args = pd.merge(zone_args,county_args,left_on='county_id',right_index=True) zone_args.res_price_factor = zone_args.res_price_factor*zone_args.cres_price_factor zone_args.nonres_price_factor = zone_args.nonres_price_factor*zone_args.cnonres_price_factor zone_args.cost_factor = zone_args.cost_factor*zone_args.ccost_factor emp_zone_diff = emp_zone_diff*zone_args.cemp_demand_factor hh_zone_diff = hh_zone_diff*zone_args.chh_demand_factor else: zone_args = None # ########################################## # #### Getting possible rents by use here ## # ########################################## buildings = buildings[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units', 'unit_price_res_sqft']] buildings.loc[:, "zone_id"] = parcels.zone_id[buildings.parcel_id].values # corrected chain index error res_buildings = buildings[buildings.unit_price_residential>0] nonres_buildings = buildings[buildings.unit_price_non_residential>0] nonres_buildings_office = nonres_buildings[nonres_buildings.building_type_id==5] nonres_buildings_retail = nonres_buildings[np.in1d(nonres_buildings.building_type_id,[17,18])] nonres_buildings_industrial = nonres_buildings[np.in1d(nonres_buildings.building_type_id,[9,22])] # Price now are in price/sqft #### XG: define residential price only on types 2,3, 20, 24 and non-residential 5, 9, 17,18,22 zone_args['zone_id']=zone_args.index res_buildings.loc[:, "resprice_sqft"] = res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].unit_price_res_sqft # corrected chain index error zonal_resprice_sqft = pd.DataFrame(res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].groupby('zone_id').resprice_sqft.mean()) zonal_resprice_sqft.columns=['resrent'] zone_args=pd.merge(zone_args,zonal_resprice_sqft, left_on='zone_id', right_index=True, how='outer') zonal_nonresprice_office = pd.DataFrame(nonres_buildings_office[nonres_buildings_office.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()) zonal_nonresprice_office.columns=['nonresrent_office'] zone_args=pd.merge(zone_args,zonal_nonresprice_office, left_on='zone_id', right_index=True, how='outer') zonal_nonresprice_retail = pd.DataFrame(nonres_buildings_retail[ nonres_buildings_retail.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()) zonal_nonresprice_retail.columns=['nonresrent_retail'] zone_args=pd.merge(zone_args,zonal_nonresprice_retail, left_on='zone_id', right_index=True, how='outer') zonal_nonresprice_industrial = pd.DataFrame(nonres_buildings_industrial[ nonres_buildings_industrial.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()) zonal_nonresprice_industrial.columns=['nonresrent_industrial'] zone_args=pd.merge(zone_args,zonal_nonresprice_industrial, left_on='zone_id', right_index=True, how='outer') zone_args['resrent']=zone_args['resrent']*zone_args.res_price_factor zone_args['nonresrent_office']=zone_args['nonresrent_office']* zone_args.nonres_price_factor zone_args['nonresrent_retail']=zone_args['nonresrent_retail']* zone_args.nonres_price_factor zone_args['nonresrent_industrial']=zone_args['nonresrent_industrial']* zone_args.nonres_price_factor zonal_avg_rents= zone_args[['resrent', 'nonresrent_office', 'nonresrent_retail','nonresrent_industrial','cost_factor','allowable_density_factor']] zonal_avg_rents.loc[:, "zone_id"] = zonal_avg_rents.index # corrected chain index error zonal_avg_rents.loc[:, 'county_id'] = dset.zones.county_id[zonal_avg_rents['zone_id']].values # corrected chain index error pd.set_option('display.max_rows', len(dset.zones.index)) del zonal_avg_rents['county_id'] del zonal_avg_rents['zone_id'] """ res_buildings['resprice_sqft'] = res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].unit_price_res_sqft zonal_resprice_sqft = pd.DataFrame(res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].groupby('zone_id').resprice_sqft.mean()) zonal_nonresprice_office = pd.DataFrame(nonres_buildings_office[nonres_buildings_office.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()) zonal_avg_rents=pd.join(zonal_resprice_sqft, zonal_nonresprice_office, how='outer') print zonal_avg_rents sys.exit('beurk') zonal_nonresprice_retail = pd.DataFrame(nonres_buildings_retail[ nonres_buildings_retail.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()) zonal_avg_rents=pd.join( zonal_nonresprice_retail, zonal_avg_rents, how='outer') zonal_nonresprice_industrial = nonres_buildings_industrial[ nonres_buildings_industrial.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean() zonal_resrent = zonal_resprice_sqft zonal_nonresrent_office = zonal_nonresprice_office zonal_nonresrent_retail = zonal_nonresprice_retail zonal_nonresrent_industrial = zonal_nonresprice_industrial if zone_args is not None: #####Make sure no nulls in the prices either... zonal_resrent = zonal_resrent * zone_args.res_price_factor print zonal_resrent zonal_nonresrent_office = zonal_nonresprice_office * zone_args.nonres_price_factor zonal_nonresrent_retail = zonal_nonresprice_retail * zone_args.nonres_price_factor zonal_nonresrent_industrial = zonal_nonresprice_industrial * zone_args.nonres_price_factor zonal_avg_rents = pd.DataFrame({'resrent':zonal_resrent,'nonresrent_office':zonal_nonresrent_office,'nonresrent_retail':zonal_nonresrent_retail,'nonresrent_industrial':zonal_nonresrent_industrial,'cost_factor':zone_args.cost_factor,'allowable_density_factor':zone_args.allowable_density_factor}, index=zonal_resrent.index) else: zonal_avg_rents = pd.DataFrame({'resrent':zonal_resrent,'nonresrent_office':zonal_nonresrent_office,'nonresrent_retail':zonal_nonresrent_retail,'nonresrent_industrial':zonal_nonresrent_industrial}) zonal_avg_rents['zone_id']=zonal_avg_rents.index zonal_avg_rents['county_id']=dset.zones.county_id[zonal_avg_rents['zone_id']].values pd.set_option('display.max_rows', len(dset.zones.index)) print zonal_avg_rents[ zonal_avg_rents['county_id']==8123].zone_id del zonal_avg_rents['county_id'] del zonal_avg_rents['zone_id'] """ avgrents = pd.merge(parcels,zonal_avg_rents,left_on='zone_id',right_index=True,how='left') avgrents['residential'] = avgrents.resrent avgrents['office'] = avgrents.nonresrent_office avgrents['retail'] = avgrents.nonresrent_retail avgrents['industrial'] = avgrents.nonresrent_industrial if zone_args is not None: avgrents = avgrents[['residential','office','retail','industrial','cost_factor','allowable_density_factor', 'county_id']] else: avgrents = avgrents[['residential','office','retail','industrial']] avgrents = avgrents.fillna(.1) #avgrents.residential[np.isinf(avgrents.residential)] = .2 avgrents.loc[avgrents.residential < .2, "residential"] = .2 # corrected chain index error avgrents.loc[avgrents.office < 1, "office"] = 1 # corrected chain index error avgrents.loc[avgrents.retail < 1, "retail"] = 1 # corrected chain index error avgrents.loc[avgrents.industrial < 1, "industrial"] = 1 # corrected chain index error ####################GET PARCEL LEVEL ATTRIBUTES #### XG: retain old square footage as it is used to compute average buildings.loc[:, 'bldg_sq_ft2'] = buildings['bldg_sq_ft'] # corrected chain index error buildings.loc[:, 'bldg_sq_ft'] = buildings.non_residential_sqft + buildings.residential_units*buildings.sqft_per_unit # corrected chain index error #buildings['impval'] = buildings.non_residential_sqft*buildings.unit_price_non_residential + buildings.residential_units*buildings.unit_price_residential buildings.loc[:, 'impval'] = 0 # corrected chain index error buildings.loc[buildings.residential_units*buildings.unit_price_residential>0,'impval'] = buildings.residential_units*buildings.unit_price_residential buildings.loc[buildings.non_residential_sqft*buildings.unit_price_non_residential >0,'impval']=buildings['impval']+ buildings.non_residential_sqft*buildings.unit_price_non_residential far_predictions = pd.DataFrame(index=parcels.index) #far_predictions['current_yearly_rent_buildings'] = buildings.groupby('parcel_id').impval.sum()/17.9 far_predictions['current_yearly_rent_buildings'] = buildings.groupby('parcel_id').impval.sum() far_predictions['current_yearly_rent_buildings'] = far_predictions.current_yearly_rent_buildings.fillna(0) far_predictions.current_yearly_rent_buildings = far_predictions.current_yearly_rent_buildings * developer_configuration['land_property_acquisition_cost_factor'] far_predictions['county_id']=parcels.county_id[far_predictions.index].values print far_predictions[far_predictions['current_yearly_rent_buildings']>0].groupby('county_id').current_yearly_rent_buildings.mean() if zone_args is not None: #far_predictions.current_yearly_rent_buildings = avgrents.cost_factor*far_predictions.current_yearly_rent_buildings ##Cost scaling happens here far_predictions.current_yearly_rent_buildings = far_predictions.current_yearly_rent_buildings far_predictions['parcelsize'] = parcels.parcel_sqft ###PROFORMA SURFACE CALCULATIONS AND LOOKUPS (TO ARRIVE AT UNCONSTRAINED FARS BY USE) # do the lookup in the developer model - this is where the profitability is computed dev = spotproforma.Developer(profit_factor=developer_configuration['profit_factor']) for form in spotproforma.forms.keys(): far_predictions[form+'_feasiblefar'], far_predictions[form+'_profit'] = \ dev.lookup(form,avgrents[spotproforma.uses].as_matrix(),far_predictions.current_yearly_rent_buildings,far_predictions.parcelsize) # we now have a far prediction per parcel by allowable building type! #################DEVCONSTRAINTS: Obtain zoning and other development constraints ##### zoning = dset.fetch('zoning') fars = dset.fetch('fars') max_parcel_sqft = 200000 max_far_field = developer_configuration['max_allowable_far_field_name'] if max_far_field not in parcels.columns: parcels = pd.merge(parcels,fars,left_on='far_id',right_index=True) if developer_configuration['enforce_environmental_constraints']: parcels[max_far_field] = parcels[max_far_field]*(1 - parcels.prop_constrained) #Adjust allowable FAR to account for undevelopable proportion of parcel land if developer_configuration['enforce_ugb']: parcels[max_far_field][parcels.in_ugb==0] = parcels[max_far_field][parcels.in_ugb==0] * developer_configuration['outside_ugb_allowable_density'] if developer_configuration['uga_policies']: parcels[max_far_field][parcels.in_uga==1] = parcels[max_far_field][parcels.in_ugb==1] * developer_configuration['inside_uga_allowable_density'] parcels.loc[parcels.parcel_sqft < developer_configuration['min_lot_sqft'], "max_far_field"] = 0 # fixed chained index error parcels.loc[parcels.parcel_sqft > max_parcel_sqft, "max_far_field"] = 0 # fixed chained indexing error if 'type1' not in parcels.columns: parcels = pd.merge(parcels,zoning,left_on='zoning_id',right_index=True) ##Scale allowable FARs here if needed if zone_args is not None: parcels[max_far_field] = parcels[max_far_field]*avgrents.allowable_density_factor ####### BUILDING TYPE DICTIONARY ##### type_d = { 'residential': [2,3,20,24], 'industrial': [9,22], 'retail': [17,18], 'office': [5], } ###MERGE ALLOWABLE DENSITY BY USE WITH FEASIBLE DENSITY BY USE (TAKE MINIMUM) TO ARRIVE AT A PARCEL PREDICTION # we have zoning by like 16+ building types and rents/far predictions by 4 more aggregate building types # so we have to convert one into the other parcel_predictions = pd.DataFrame(index=parcels.index) parcel_predictions['county_id']=parcels.county_id for typ, btypes in type_d.iteritems(): for btype in btypes: # three questions - 1) is type allowed 2) what FAR is allowed 3) is it supported by rents if developer_configuration['enforce_allowable_use_constraints']: tmp = parcels[parcels['type%d'%btype]==1][[max_far_field]] # is type allowed far_predictions['type%d_zonedfar'%btype] = tmp[max_far_field] # at what far else: far_predictions['type%d_zonedfar'%btype] = parcels[max_far_field] # merge zoning with feasibility tmp.index.name='parcel_id' tmp = pd.merge(tmp,far_predictions[[typ+'_feasiblefar']],left_index=True,right_index=True,how='left').set_index(tmp.index) # min of zoning and feasibility parcel_predictions[btype] = pd.Series(np.minimum(tmp[max_far_field],tmp[typ+'_feasiblefar']),index=tmp.index) #avgrents2=avgrents.ix[parcels['type%d'%btype]==1] #profit=dev.profit(typ,avgrent2s[spotproforma.uses].as_matrix(),far_predictions.current_yearly_rent_buildings,parcel_prediction[btype]) #print profit #parcel_predictions[btype+'_profit']=pd.Series(profit,index=tmp.index) parcel_predictions = parcel_predictions.dropna(how='all').sort_index(axis=1) for col in parcel_predictions.columns: print col, (parcel_predictions[col]*far_predictions.parcelsize).sum()/1000000.0 ###LIMITING PARCEL PREDICTIONS TO 1MILLION SQFT ####SELECTING SITES np.random.seed(1) p_sample_proportion = .5 parcel_predictions = parcel_predictions.ix[np.random.choice(parcel_predictions.index, int(len(parcel_predictions.index)*p_sample_proportion),replace=False)] parcel_predictions.index.name = 'parcel_id' parcel_predictions.to_csv(os.path.join(misc.data_dir(),'parcel_predictions.csv'),index_col='parcel_id',float_format="%.2f") # far_predictions.to_csv(os.path.join(misc.data_dir(),'far_predictions.csv'),index_col='parcel_id',float_format="%.2f") #####CALL TO THE DEVELOPER newbuildings, price_shifters = new_developer.run(dset,hh_zone_diff,emp_zone_diff,parcel_predictions,year=sim_year, min_building_sqft=developer_configuration['min_building_sqft'], min_lot_sqft=developer_configuration['min_lot_sqft'], max_lot_sqft=max_parcel_sqft,zone_args=zone_args, tot_sqft=dset.zones[['residential_sqft_zone','non_residential_sqft_zone']]) #####APPLY PRICE SHIFTS (PSEUDO-EQUILIBRATION) [MAKE THIS OPTIONAL] print 'Applying price shifts' pshift_btypes = [] pshift_zone = [] pshift_shift = [] for item in price_shifters.items(): pshift_btypes.append(item[0][0]) pshift_zone.append(item[0][1]) pshift_shift.append(item[1]) pshift = pd.DataFrame({'btype':pshift_btypes,'zone':pshift_zone,'shift_amount':pshift_shift}) buildings['zone_id'] = parcels.loc[buildings.parcel_id, "zone_id"].values buildings['bid'] = buildings.index.values buildings = pd.merge(buildings,pshift,left_on=['building_type_id','zone_id'],right_on=['btype','zone'],how='left') buildings.shift_amount = buildings.shift_amount.fillna(1.0) buildings.unit_price_residential = buildings.unit_price_residential*buildings.shift_amount #buildings.unit_price_non_residential = buildings.unit_price_non_residential*buildings.shift_amount buildings.index = buildings.bid ##When net residential units is less than 0, need to implement building demolition newbuildings = newbuildings[['zone_id','building_type_id', 'building_sqft','residential_units','lot_size']] #print newbuildings.building_sqft newbuildings = newbuildings.reset_index() newbuildings.columns = ['parcel_id','zone_id','building_type_id','bldg_sq_ft','residential_units','land_area'] newbuildings.parcel_id = newbuildings.parcel_id.astype('int32') #newbuildings['county_id']=parcel_predictions.county_id[newbuildings.parcel_id].values # why is this here? #print newbuildings[newbuildings.residential_units == 0].groupby('county_id').bldg_sq_ft.sum() newbuildings.residential_units = newbuildings.residential_units.astype('int32') newbuildings.land_area = newbuildings.land_area.astype('int32') newbuildings.building_type_id = newbuildings.building_type_id.astype('int32') newbuildings.parcel_id = newbuildings.parcel_id.astype('int32') newbuildings.bldg_sq_ft = np.round(newbuildings.bldg_sq_ft).astype('int32') newbuildings.bldg_sq_ft2 = np.round(newbuildings.bldg_sq_ft).astype('int32') newbuildings['non_residential_sqft'] = 0 newbuildings.loc[newbuildings.residential_units == 0, "non_residential_sqft"] = newbuildings.bldg_sq_ft newbuildings['improvement_value'] = (newbuildings.non_residential_sqft*100 + newbuildings.residential_units*100000).astype('int32') newbuildings['sqft_per_unit'] = 1400 newbuildings.loc[newbuildings.residential_units>0, "sqft_per_unit"] = 1000 newbuildings['stories'] = np.ceil(newbuildings.bldg_sq_ft*1.0/newbuildings.land_area).astype('int32') newbuildings['tax_exempt'] = 0 newbuildings['year_built'] = sim_year newbuildings['unit_price_residential'] = 0.0 newbuildings.loc[newbuildings.residential_units>0, "unit_price_residential"] = buildings[buildings.unit_price_residential>0].unit_price_residential.median() newbuildings['unit_price_res_sqft'] = 0.0 newbuildings.loc[newbuildings.residential_units>0, "unit_price_res_sqft"] = buildings[buildings.unit_price_res_sqft>0].unit_price_res_sqft.median() newbuildings['unit_price_non_residential'] = 0.0 newbuildings.loc[newbuildings.non_residential_sqft>0, "unit_price_non_residential"] = buildings[buildings.unit_price_non_residential>0].unit_price_non_residential.median() ##### XG: originally, impose exogenous prices for new buildings. Now impose average county price #newbuildings['county_id'] = dset.parcels.county_id[newbuildings.parcel_id].values # improper join - index incorrect newbuildings['county_id'] = parcels.loc[newbuildings.parcel_id, "county_id"].values #buildings['county_id'] = dset.parcels.county_id[buildings.parcel_id].values # improper join - index incorrect buildings['county_id'] = parcels.loc[buildings.parcel_id, "county_id"].values u=pd.DataFrame(buildings[(buildings.bldg_sq_ft2>0)*(np.in1d(buildings.building_type_id,[2,3,20,24]))].groupby('county_id').unit_price_res_sqft.mean()) u.columns=['res_price_county'] newbuildings=pd.merge(newbuildings, u, left_on='county_id', right_index=True) u=pd.DataFrame(buildings[(buildings.non_residential_sqft>0)*(np.in1d(buildings.building_type_id,[5,9,17,18,22]))].groupby('county_id').unit_price_non_residential.mean()) u.columns=['nres_price_county'] newbuildings=pd.merge(newbuildings, u, left_on='county_id', right_index=True) u=pd.DataFrame(buildings.groupby('county_id').unit_price_residential.mean()) u.columns=['unit_res_price_county'] newbuildings=pd.merge(newbuildings, u, left_on='county_id', right_index=True) newbuildings.loc[(newbuildings.bldg_sq_ft>0)*(np.in1d(newbuildings.building_type_id,[2,3,20,24])), "unit_price_residential"] = newbuildings.unit_res_price_county newbuildings.loc[(newbuildings.bldg_sq_ft>0)*(np.in1d(newbuildings.building_type_id,[2,3,20,24])), "unit_price_res_sqft"] = newbuildings.res_price_county newbuildings.loc[(newbuildings.non_residential_sqft>0)*(np.in1d(newbuildings.building_type_id,[5,9,17,18,22])), "unit_price_non_residential"] = newbuildings.nres_price_county #print newbuildings[(np.in1d(newbuildings.building_type_id,[2,3,20,24]))*(newbuildings['bldg_sq_ft']>0)].groupby('county_id').unit_price_res_sqft.mean() #### end XG newbuildings['building_sqft_per_job'] = 250.0 #####Need to replace with observed newbuildings['non_residential_units'] = (newbuildings.non_residential_sqft/newbuildings.building_sqft_per_job).fillna(0) newbuildings['base_year_jobs'] = 0.0 newbuildings['all_units'] = newbuildings.non_residential_units + newbuildings.residential_units newbuildings.non_residential_sqft = newbuildings.non_residential_sqft.astype('int32') newbuildings.tax_exempt = newbuildings.tax_exempt.astype('int32') newbuildings.year_built = newbuildings.year_built.astype('int32') newbuildings.sqft_per_unit = newbuildings.sqft_per_unit.astype('int32') newbuildings = newbuildings.set_index(np.arange(len(newbuildings.index))+np.amax(buildings.index.values)+1) buildings = buildings[['zone_id','building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','bldg_sq_ft2','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units', 'unit_price_res_sqft']] return buildings, newbuildings
def run(dset, current_year): """Refines zone level model results """ b = dset.buildings p = dset.parcels if p.index.name != 'parcel_id': p = p.set_index('parcel_id') z = dset.zones e = dset.establishments hh = dset.households zone_refine = pd.read_csv( os.path.join(misc.data_dir(), 'zone_demand_refine_no_broomfield.csv')) shuffled_hh_id = np.random.shuffle(hh.index.values) shuffled_emp_id = np.random.shuffle(e.index.values) def relocate_agents(agents_joined, zone_id, number_of_agents): agent_pool = agents_joined[agents_joined.zone_id != zone_id] #shuffled_ids = agent_pool.index.values #np.random.shuffle(shuffled_ids) #agents_to_relocate = shuffled_ids[:number_of_agents] #idx_agents_to_relocate = np.in1d(dset.households.index.values,agents_to_relocate) random_sample = random.sample(agent_pool.index, number_of_agents) # new_building_id = b[b.zone_id==zone_id].index.values[0] # dset.households.building_id[idx_agents_to_relocate] = new_building_id #try: new_building_id = b[b.zone_id == zone_id].index.values[0] agents_joined.loc[random_sample, "building_id"] = new_building_id # except: # print 'No buildings in specified zone.' # if zone_id not in dset.parcels.zone_id.values: # county = z.county.values[z.index.values==zone_id][0] # x = z.zonecentroid_x.values[z.index.values==zone_id][0] # y = z.zonecentroid_y.values[z.index.values==zone_id][0] # if county == 'Denver': # county_id = 8031 # elif county == 'Adams': # county_id = 8001 # elif county == 'Arapahoe': # county_id = 8005 # elif county == 'Boulder': # county_id = 8013 # elif county == 'Broomfield': # county_id = 8014 # elif county == 'Clear Creek': # county_id = 8019 # elif county == 'Douglas': # county_id = 8035 # elif county == 'Elbert': # county_id = 8039 # elif county == 'Gilpin': # county_id = 8047 # elif county == 'Jefferson': # county_id = 8059 # elif county == 'Weld': # county_id = 8123 # pid = p.index.values.max()+1 # newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id], # 'centroid_x':[x],'centroid_y':[y],'x':[x],'y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0], # 'prop_constrained':[0.0],'acres':[1.0] }) # newparcel.index = np.array([pid]) # dset.d['parcels'] = pd.concat([p,newparcel]) # dset.parcels.index.name = 'parcel_id' # else: # pid = p.index.values[p.zone_id==zone_id][0] # print 'Constructing small structure to place agents' # new_building_id = dset.buildings.index.values.max() + 1 # newbuildings = pd.DataFrame({'building_type_id':[20],'improvement_value':[10000],'land_area':[200],'non_residential_sqft':[0], # 'parcel_id':[pid],'residential_units':[2],'sqft_per_unit':[250],'stories':[0],'tax_exempt':[0],'year_built':[2000],'bldg_sq_ft':[500], # 'unit_price_non_residential':[0.0],'unit_price_residential':[5000.0], 'building_sqft_per_job':[0.0], # 'non_residential_units':[0],'base_year_jobs':[0.0],'all_units':[2]}) # newbuildings.index = np.array([new_building_id]) # dset.d['buildings'] = pd.concat([dset.buildings,newbuildings]) # dset.buildings.index.name = 'building_id' # agents_joined.building_id[idx_agents_to_relocate] = new_building_id def unplace_agents(agents_joined, zone_id, number_of_agents): number_of_agents = -number_of_agents #flip the sign agent_pool = agents_joined[ agents_joined.zone_id == zone_id] ##Notice the equality instead of disequality if len(agent_pool) > number_of_agents: #shuffled_ids = agent_pool.index.values #np.random.shuffle(shuffled_ids) #agents_to_relocate = shuffled_ids[:number_of_agents] #idx_agents_to_relocate = np.in1d(dset.households.index.values,agents_to_relocate) random_sample = random.sample(agent_pool.index, number_of_agents) dset.households.building_id[random_sample] = -1 #unplace def relocate_estabs(agents_joined, zone_id, number_of_agents): agent_pool = agents_joined[(agents_joined.zone_id != zone_id)] e_sample = agent_pool.reindex(np.random.permutation(agent_pool.index)) e_to_move = e_sample[np.cumsum(e_sample['employees'].values) < abs( number_of_agents + 10)] shuffled_ids = e_to_move.index.values #np.random.shuffle(shuffled_ids) agents_to_relocate = shuffled_ids idx_agents_to_relocate = np.in1d(dset.establishments.index.values, agents_to_relocate) # new_building_id = b[b.zone_id==zone_id].index.values[0] # dset.establishments.building_id[idx_agents_to_relocate] = new_building_id #try: new_building_id = b[b.zone_id == zone_id].index.values[0] agents_joined.loc[ idx_agents_to_relocate, "building_id"] = new_building_id # corrected chain index error # except: # print 'No buildings in specified zone.' # if zone_id not in dset.parcels.zone_id.values: # county = z.county.values[z.index.values==zone_id][0] # x = z.zonecentroid_x.values[z.index.values==zone_id][0] # y = z.zonecentroid_y.values[z.index.values==zone_id][0] # if county == 'Denver': # county_id = 8031 # elif county == 'Adams': # county_id = 8001 # elif county == 'Arapahoe': # county_id = 8005 # elif county == 'Boulder': # county_id = 8013 # elif county == 'Broomfield': # county_id = 8014 # elif county == 'Clear Creek': # county_id = 8019 # elif county == 'Douglas': # county_id = 8035 # elif county == 'Elbert': # county_id = 8039 # elif county == 'Gilpin': # county_id = 8047 # elif county == 'Jefferson': # county_id = 8059 # elif county == 'Weld': # county_id = 8123 # pid = p.index.values.max()+1 # newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id], # 'centroid_x':[x],'centroid_y':[y],'x':[x],'y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0], # 'prop_constrained':[0.0],'acres':[1.0] }) # newparcel.index = np.array([pid]) # dset.d['parcels'] = pd.concat([p,newparcel]) # dset.parcels.index.name = 'parcel_id' # else: # pid = p.index.values[p.zone_id==zone_id][0] # print 'Constructing small structure to place agents' # new_building_id = dset.buildings.index.values.max() + 1 # newbuildings = pd.DataFrame({'building_type_id':[4],'improvement_value':[10000],'land_area':[200],'non_residential_sqft':[500], # 'parcel_id':[pid],'residential_units':[0],'sqft_per_unit':[0],'stories':[0],'tax_exempt':[0],'year_built':[2000],'bldg_sq_ft':[500], # 'unit_price_non_residential':[2.0],'unit_price_residential':[0.0], 'building_sqft_per_job':[250.0], # 'non_residential_units':[2],'base_year_jobs':[0.0],'all_units':[2]}) # newbuildings.index = np.array([new_building_id]) # dset.d['buildings'] = pd.concat([dset.buildings,newbuildings]) # dset.buildings.index.name = 'building_id' # agents_joined.loc[idx_agents_to_relocate, "building_id"] = new_building_id # corrected chain index error def unplace_estabs(agents_joined, zone_id, number_of_agents): number_of_agents = -number_of_agents #flip the sign agent_pool = agents_joined[ agents_joined.zone_id == zone_id] ##Notice the equality instead of disequality if agent_pool.employees.sum() > number_of_agents: e_sample = agent_pool.reindex( np.random.permutation(agent_pool.index)) e_to_move = e_sample[np.cumsum(e_sample['employees'].values) < abs( number_of_agents)] shuffled_ids = e_to_move.index.values np.random.shuffle(shuffled_ids) agents_to_relocate = shuffled_ids idx_agents_to_relocate = np.in1d(dset.establishments.index.values, agents_to_relocate) dset.establishments.building_id[ idx_agents_to_relocate] = -1 #unplace # for zone in zone_refine.zone_id.values: # idx_zone = (zone_refine.zone_id==zone) # hh_shift = zone_refine.annual_hh_shift[idx_zone].values[0] # emp_shift = zone_refine.annual_emp_shift[idx_zone].values[0] # if hh_shift > 0: # relocate_agents(hh,zone,hh_shift) # if emp_shift > 0: # relocate_estabs(e,zone,emp_shift) # if current_year < 2040: # if hh_shift < 0: # unplace_agents(hh,zone,hh_shift) # if emp_shift < 0: # unplace_agents(e,zone,emp_shift) def refine(series): hh_shift = series.annual_hh_shift emp_shift = series.annual_emp_shift zone = series.zone_id if hh_shift > 0: relocate_agents(hh, zone, hh_shift) if emp_shift > 0: relocate_estabs(e, zone, emp_shift) if current_year < 2040: if hh_shift < 0: unplace_agents(hh, zone, hh_shift) if emp_shift < 0: unplace_estabs(e, zone, emp_shift) zone_refine.apply(refine, axis=1)
for btype in btypes: # three questions - 1) is type allowed 2) what FAR is allowed 3) is it supported by rents tmp = parcels[parcels['type%d'%btype]=='t'][['max_far']] # is type allowed far_predictions['type%d_zonedfar'%btype] = tmp['max_far'] # at what far # merge zoning with feasibility tmp = pd.merge(tmp,far_predictions[[typ+'_feasiblefar']],left_index=True,right_index=True,how='left').set_index(tmp.index) # min of zoning and feasibility parcel_predictions[btype] = pd.Series(np.minimum(tmp['max_far'],tmp[typ+'_feasiblefar']),index=tmp.index) parcel_predictions = parcel_predictions.dropna(how='all').sort_index(axis=1) print "Average rents\n", avgrents.describe() print "Feasibility\n", far_predictions.describe() print "Restricted to zoning\n", parcel_predictions.describe() print "Feasible square footage (in millions)" for col in parcel_predictions.columns: print col, (parcel_predictions[col]*far_predictions.parcelsize).sum()/1000000.0 parcel_predictions.to_csv('parcel_predictions.csv',index_col='parcel_id',float_format="%.2f") far_predictions.to_csv('far_predictions.csv',index_col='parcel_id',float_format="%.2f") print "Finished developer", time.ctime() if __name__ == '__main__': print "Running spotproforma" dev = spotproforma.Developer() print "Done running spotproforma" dset = baydataset.BayAreaDataset(os.path.join(misc.data_dir(),'bayarea.h5')) run(dset,2010,dev=dev)
def run(self, name=None, export_buildings_to_urbancanvas=False, base_year=2010, forecast_year=None, fixed_seed=True, random_seed=1, indicator_configuration=None, core_components_to_run=None, household_transition=None,household_relocation=None,employment_transition=None, elcm_configuration=None, developer_configuration=None, table_swapping=None, travel_model_configuration1=None, travel_model_configuration2=None, travel_model_configuration3=None, travel_model_configuration4=None, travel_model_configuration5=None, travel_model_configuration6=None): """Runs an UrbanSim2 scenario """ logger.log_status('Starting UrbanSim2 run.') dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5')) seconds_start = time.time() if fixed_seed: logger.log_status('Running with fixed random seed.') np.random.seed(random_seed) #Load estimated coefficients coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs.h5')) dset.coeffs = coeff_store.coeffs.copy() coeff_store.close() coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs_res.h5')) dset.coeffs_res = coeff_store.coeffs_res.copy() coeff_store.close() #Keep track of unplaced agents by year unplaced_hh = [] unplaced_emp = [] #UrbanCanvas scenario id, replaced by db-retrieved value during export step urbancanvas_scenario_id = 0 #####Residential Buildings##### new_refiner.add_res_buildings(dset) #####Non-Residential Buildings##### new_refiner.add_non_res_buildings(dset) for sim_year in range(base_year,forecast_year+1): print 'Simulating year ' + str(sim_year) logger.log_status(sim_year) ##Variable Library calculations variable_library.calculate_variables(dset) #Record pre-demand model zone-level household/job totals hh_zone1 = dset.fetch('households').groupby('zone_id').size() emp_zone1 = dset.fetch('establishments').groupby('zone_id').employees.sum() ############ ELCM SIMULATION if core_components_to_run['ELCM']: logger.log_status('ELCM simulation.') alternatives = dset.buildings[(dset.buildings.non_residential_sqft>0)] new_elcm_model.simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'establishments',output_names = ("drcog-coeff-elcm-%s.csv","DRCOG EMPLOYMENT LOCATION CHOICE MODELS (%s)","emp_location_%s","establishment_building_ids"), agents_groupby= ['sector_id_retail_agg',],transition_config = {'Enabled':True,'control_totals_table':'annual_employment_control_totals','scaling_factor':1.0}) ################# HLCM SIMULATION if core_components_to_run['HLCM']: logger.log_status('HLCM simulation.') alternatives = dset.buildings[(dset.buildings.residential_units>0)] new_hlcm_simulation.simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'households',output_names = ("drcog-coeff-hlcm-%s.csv","DRCOG HOUSEHOLD LOCATION CHOICE MODELS (%s)","hh_location_%s","household_building_ids"), agents_groupby= ['income_3_tenure',],transition_config = {'Enabled':True,'control_totals_table':'annual_household_control_totals','scaling_factor':1.0}, relocation_config = {'Enabled':True,'relocation_rates_table':'annual_household_relocation_rates','scaling_factor':1.0},) ############ DEMAND-SIDE REFINEMENT #refiner.run(dset, sim_year) # refiner_fnc = "refiner.run(dset, sim_year)" #cProfile.runctx(refiner_fnc, locals={'dset':dset, 'sim_year':sim_year}, globals={'refiner': refiner}, filename='c:/users/jmartinez/documents/refiner_time') ############ REPM SIMULATION if core_components_to_run['Price']: logger.log_status('REPM simulation.') #Residential census_model_simulation.simulate_residential(dset, 'unit_price_res_sqft', 'school_district_id', 10, sim_year) #Non-residential regression_model_simulation.simulate(dset, year=sim_year,output_varname='unit_price_non_residential', simulation_table='buildings', output_names = ["drcog-coeff-nrhedonic-%s.csv","DRCOG NRHEDONIC MODEL (%s)","nrprice_%s"], agents_groupby = 'building_type_id', segment_ids = [5,8,11,16,17,18,21,23,9,22]) ############ DEVELOPER SIMULATION if core_components_to_run['Developer']: logger.log_status('Proforma simulation.') buildings, newbuildings = proforma_developer_model.run(dset,hh_zone1,emp_zone1,developer_configuration,sim_year) #import pdb; pdb.set_trace() dset.d['buildings'] = pd.concat([buildings,newbuildings]) dset.buildings.index.name = 'building_id' ############ INDICATORS if indicator_configuration['export_indicators']: unplaced_hh.append((dset.households.building_id==-1).sum()) unplaced_emp.append(dset.establishments[dset.establishments.building_id==-1].employees.sum()) if sim_year in indicator_configuration['years_to_run']: logger.log_status('Exporting indicators') indicators.run(dset, indicator_configuration['indicator_output_directory'], sim_year) logger.log_status('unplaced hh') logger.log_status(unplaced_hh) logger.log_status('unplaced emp') logger.log_status(unplaced_emp) ############ TRAVEL MODEL export_zonal_file.export_zonal_file_to_tm(dset,sim_year,logger,tm_config=[travel_model_configuration1,travel_model_configuration2,travel_model_configuration3,travel_model_configuration4,travel_model_configuration5,travel_model_configuration6]) ############ SWAPPER if sim_year == table_swapping['year']: if table_swapping['swap_skims']: logger.log_status('Swapping skims') td2 = pd.read_csv(table_swapping['new_skim_file'], index_col=['from_zone_id','to_zone_id']) dset.d['travel_data'] = td2 if table_swapping['swap_dist_rail']: logger.log_status('Swapping parcel distance to rail') p2 = pd.read_csv(table_swapping['new_dist_rail_file'], index_col=['parcel_id']) dset.d['parcels']['dist_rail'] = p2.dist_rail ############ URBANCANVAS if export_buildings_to_urbancanvas: logger.log_status('Exporting %s buildings to Urbancanvas database for project %s and year %s.' % (newbuildings.index.size,urbancanvas_scenario_id,sim_year)) urbancanvas_scenario_id = urbancanvas_export.export_to_urbancanvas(newbuildings, sim_year, urbancanvas_scenario_id) elapsed = time.time() - seconds_start print "TOTAL elapsed time: " + str(elapsed) + " seconds."
newbuildings.building_sqft / RESUNITSIZE) newbuildings = newbuildings[newbuildings.lot_size < MAXLOTSIZE] newbuildings = newbuildings[newbuildings.lot_size > MINLOTSIZE] return newbuildings, price_shifters_d #price_shifters.to_csv('c:/users/jmartinez/documents/test_results.csv') sqft = pd.DataFrame() if __name__ == '__main__': import dataset import cProfile dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(), 'drcog.h5')) #add variables for test sim emp_zone_diff = pd.read_csv( 'C:/Users/jmartinez/Documents/Projects/UrbanSim/Developer/emp_zone_diff.csv', index_col=0) hh_zone_diff = pd.read_csv( 'C:/Users/jmartinez/Documents/Projects/UrbanSim/Developer/hh_zone_diff.csv', index_col=0) parcel_predictions = pd.read_csv( 'C:/Users/jmartinez/Documents/Projects/UrbanSim/Developer/parcel_predictions2.csv', index_col=0) zone_args = pd.read_csv( 'C:/Users/jmartinez/Documents/Projects/UrbanSim/Developer/zone_args.csv', index_col=0) tot_sqft = pd.read_csv(
def run(dset, hh_zone1, emp_zone1, developer_configuration, sim_year): # Record post-demand-model change in zone-level household/job totals hh = dset.fetch("households") e = dset.fetch("establishments") buildings = dset.fetch("buildings") parcels = dset.parcels if parcels.index.name != "parcel_id": parcels = parcels.set_index(parcels["parcel_id"]) buildings["zone_id"] = parcels.zone_id[buildings.parcel_id].values # e['zone_id'] = buildings.zone_id[e.building_id].values hh["zone_id"] = buildings.zone_id[hh.building_id].values hh_zone2 = hh.groupby("zone_id").size() emp_zone2 = e.groupby("zone_id").employees.sum() zdiff = pd.DataFrame(index=dset.zones.index) ####### zdiff["hh_zone1"] = hh_zone1 zdiff["hh_zone2"] = hh_zone2 zdiff["emp_zone1"] = emp_zone1 zdiff["emp_zone2"] = emp_zone2 zdiff = zdiff.fillna(0) zdiff.hh_zone2 = zdiff.hh_zone2 + 5 zdiff.emp_zone2 = zdiff.emp_zone2 + 5 hh_zone_diff = zdiff.hh_zone2 - zdiff.hh_zone1 emp_zone_diff = zdiff.emp_zone2 - zdiff.emp_zone1 #####Get the user inputted zone args if developer_configuration["zonal_levers"]: zone_args = pd.read_csv(os.path.join(misc.data_dir(), "devmodal_zone_args.csv")).set_index("zone_id") ##Getting county_id into zone_args. Eventually, lets move the dset.zones operations to the varlib dset.zones["county_id"] = 0 dset.zones.loc[dset.zones.county == "Adams", "county_id"] = 8001 # corrected chained index error dset.zones.loc[dset.zones.county == "Arapahoe", "county_id"] = 8005 # corrected chained index error dset.zones.loc[dset.zones.county == "Boulder", "county_id"] = 8013 # corrected chained index error dset.zones.loc[dset.zones.county == "Broomfield", "county_id"] = 8014 # corrected chained index error dset.zones.loc[dset.zones.county == "Clear Creek", "county_id"] = 8019 # corrected chained index error dset.zones.loc[dset.zones.county == "Denver", "county_id"] = 8031 # corrected chained index error dset.zones.loc[dset.zones.county == "Douglas", "county_id"] = 8035 # corrected chained index error dset.zones.loc[dset.zones.county == "Elbert", "county_id"] = 8039 # corrected chained index error dset.zones.loc[dset.zones.county == "Gilpin", "county_id"] = 8047 # corrected chained index error dset.zones.loc[dset.zones.county == "Jefferson", "county_id"] = 8059 # corrected chained index error dset.zones.loc[dset.zones.county == "Weld", "county_id"] = 8123 # corrected chained index error zone_args["cid"] = dset.zones.county_id pd.set_option("display.max_rows", 1000) ##Loading/applying county calib factors to scale the zone args county_args = pd.read_csv(os.path.join(misc.data_dir(), "county_calib.csv")).set_index("county_id") zone_args["county_id"] = zone_args["cid"] zone_args = pd.merge(zone_args, county_args, left_on="county_id", right_index=True) zone_args.res_price_factor = zone_args.res_price_factor * zone_args.cres_price_factor zone_args.nonres_price_factor = zone_args.nonres_price_factor * zone_args.cnonres_price_factor zone_args.cost_factor = zone_args.cost_factor * zone_args.ccost_factor emp_zone_diff = emp_zone_diff * zone_args.cemp_demand_factor hh_zone_diff = hh_zone_diff * zone_args.chh_demand_factor else: zone_args = None # ########################################## # #### Getting possible rents by use here ## # ########################################## buildings = buildings[ [ "building_type_id", "improvement_value", "land_area", "non_residential_sqft", "parcel_id", "residential_units", "sqft_per_unit", "stories", "tax_exempt", "year_built", "bldg_sq_ft", "unit_price_non_residential", "unit_price_residential", "building_sqft_per_job", "non_residential_units", "base_year_jobs", "all_units", "unit_price_res_sqft", ] ] buildings.loc[:, "zone_id"] = parcels.zone_id[buildings.parcel_id].values # corrected chain index error res_buildings = buildings[buildings.unit_price_residential > 0] nonres_buildings = buildings[buildings.unit_price_non_residential > 0] nonres_buildings_office = nonres_buildings[nonres_buildings.building_type_id == 5] nonres_buildings_retail = nonres_buildings[np.in1d(nonres_buildings.building_type_id, [17, 18])] nonres_buildings_industrial = nonres_buildings[np.in1d(nonres_buildings.building_type_id, [9, 22])] # Price now are in price/sqft #### XG: define residential price only on types 2,3, 20, 24 and non-residential 5, 9, 17,18,22 zone_args["zone_id"] = zone_args.index res_buildings.loc[:, "resprice_sqft"] = res_buildings[ (res_buildings.bldg_sq_ft > 0) * (np.in1d(res_buildings.building_type_id, [2, 3, 20, 24])) ].unit_price_res_sqft # corrected chain index error zonal_resprice_sqft = pd.DataFrame( res_buildings[(res_buildings.bldg_sq_ft > 0) * (np.in1d(res_buildings.building_type_id, [2, 3, 20, 24]))] .groupby("zone_id") .resprice_sqft.mean() ) zonal_resprice_sqft.columns = ["resrent"] zone_args = pd.merge(zone_args, zonal_resprice_sqft, left_on="zone_id", right_index=True, how="outer") zonal_nonresprice_office = pd.DataFrame( nonres_buildings_office[nonres_buildings_office.non_residential_sqft > 0] .groupby("zone_id") .unit_price_non_residential.mean() ) zonal_nonresprice_office.columns = ["nonresrent_office"] zone_args = pd.merge(zone_args, zonal_nonresprice_office, left_on="zone_id", right_index=True, how="outer") zonal_nonresprice_retail = pd.DataFrame( nonres_buildings_retail[nonres_buildings_retail.non_residential_sqft > 0] .groupby("zone_id") .unit_price_non_residential.mean() ) zonal_nonresprice_retail.columns = ["nonresrent_retail"] zone_args = pd.merge(zone_args, zonal_nonresprice_retail, left_on="zone_id", right_index=True, how="outer") zonal_nonresprice_industrial = pd.DataFrame( nonres_buildings_industrial[nonres_buildings_industrial.non_residential_sqft > 0] .groupby("zone_id") .unit_price_non_residential.mean() ) zonal_nonresprice_industrial.columns = ["nonresrent_industrial"] zone_args = pd.merge(zone_args, zonal_nonresprice_industrial, left_on="zone_id", right_index=True, how="outer") zone_args["resrent"] = zone_args["resrent"] * zone_args.res_price_factor zone_args["nonresrent_office"] = zone_args["nonresrent_office"] * zone_args.nonres_price_factor zone_args["nonresrent_retail"] = zone_args["nonresrent_retail"] * zone_args.nonres_price_factor zone_args["nonresrent_industrial"] = zone_args["nonresrent_industrial"] * zone_args.nonres_price_factor zonal_avg_rents = zone_args[ [ "resrent", "nonresrent_office", "nonresrent_retail", "nonresrent_industrial", "cost_factor", "allowable_density_factor", ] ] zonal_avg_rents.loc[:, "zone_id"] = zonal_avg_rents.index # corrected chain index error zonal_avg_rents.loc[:, "county_id"] = dset.zones.county_id[ zonal_avg_rents["zone_id"] ].values # corrected chain index error pd.set_option("display.max_rows", len(dset.zones.index)) del zonal_avg_rents["county_id"] del zonal_avg_rents["zone_id"] """ res_buildings['resprice_sqft'] = res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].unit_price_res_sqft zonal_resprice_sqft = pd.DataFrame(res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].groupby('zone_id').resprice_sqft.mean()) zonal_nonresprice_office = pd.DataFrame(nonres_buildings_office[nonres_buildings_office.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()) zonal_avg_rents=pd.join(zonal_resprice_sqft, zonal_nonresprice_office, how='outer') print zonal_avg_rents sys.exit('beurk') zonal_nonresprice_retail = pd.DataFrame(nonres_buildings_retail[ nonres_buildings_retail.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()) zonal_avg_rents=pd.join( zonal_nonresprice_retail, zonal_avg_rents, how='outer') zonal_nonresprice_industrial = nonres_buildings_industrial[ nonres_buildings_industrial.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean() zonal_resrent = zonal_resprice_sqft zonal_nonresrent_office = zonal_nonresprice_office zonal_nonresrent_retail = zonal_nonresprice_retail zonal_nonresrent_industrial = zonal_nonresprice_industrial if zone_args is not None: #####Make sure no nulls in the prices either... zonal_resrent = zonal_resrent * zone_args.res_price_factor print zonal_resrent zonal_nonresrent_office = zonal_nonresprice_office * zone_args.nonres_price_factor zonal_nonresrent_retail = zonal_nonresprice_retail * zone_args.nonres_price_factor zonal_nonresrent_industrial = zonal_nonresprice_industrial * zone_args.nonres_price_factor zonal_avg_rents = pd.DataFrame({'resrent':zonal_resrent,'nonresrent_office':zonal_nonresrent_office,'nonresrent_retail':zonal_nonresrent_retail,'nonresrent_industrial':zonal_nonresrent_industrial,'cost_factor':zone_args.cost_factor,'allowable_density_factor':zone_args.allowable_density_factor}, index=zonal_resrent.index) else: zonal_avg_rents = pd.DataFrame({'resrent':zonal_resrent,'nonresrent_office':zonal_nonresrent_office,'nonresrent_retail':zonal_nonresrent_retail,'nonresrent_industrial':zonal_nonresrent_industrial}) zonal_avg_rents['zone_id']=zonal_avg_rents.index zonal_avg_rents['county_id']=dset.zones.county_id[zonal_avg_rents['zone_id']].values pd.set_option('display.max_rows', len(dset.zones.index)) print zonal_avg_rents[ zonal_avg_rents['county_id']==8123].zone_id del zonal_avg_rents['county_id'] del zonal_avg_rents['zone_id'] """ avgrents = pd.merge(parcels, zonal_avg_rents, left_on="zone_id", right_index=True, how="left") avgrents["residential"] = avgrents.resrent avgrents["office"] = avgrents.nonresrent_office avgrents["retail"] = avgrents.nonresrent_retail avgrents["industrial"] = avgrents.nonresrent_industrial if zone_args is not None: avgrents = avgrents[ ["residential", "office", "retail", "industrial", "cost_factor", "allowable_density_factor", "county_id"] ] else: avgrents = avgrents[["residential", "office", "retail", "industrial"]] avgrents = avgrents.fillna(0.1) # avgrents.residential[np.isinf(avgrents.residential)] = .2 avgrents.loc[avgrents.residential < 0.2, "residential"] = 0.2 # corrected chain index error avgrents.loc[avgrents.office < 1, "office"] = 1 # corrected chain index error avgrents.loc[avgrents.retail < 1, "retail"] = 1 # corrected chain index error avgrents.loc[avgrents.industrial < 1, "industrial"] = 1 # corrected chain index error ####################GET PARCEL LEVEL ATTRIBUTES #### XG: retain old square footage as it is used to compute average buildings.loc[:, "bldg_sq_ft2"] = buildings["bldg_sq_ft"] # corrected chain index error buildings.loc[:, "bldg_sq_ft"] = ( buildings.non_residential_sqft + buildings.residential_units * buildings.sqft_per_unit ) # corrected chain index error # buildings['impval'] = buildings.non_residential_sqft*buildings.unit_price_non_residential + buildings.residential_units*buildings.unit_price_residential buildings.loc[:, "impval"] = 0 # corrected chain index error buildings.loc[buildings.residential_units * buildings.unit_price_residential > 0, "impval"] = ( buildings.residential_units * buildings.unit_price_residential ) buildings.loc[buildings.non_residential_sqft * buildings.unit_price_non_residential > 0, "impval"] = ( buildings["impval"] + buildings.non_residential_sqft * buildings.unit_price_non_residential ) far_predictions = pd.DataFrame(index=parcels.index) # far_predictions['current_yearly_rent_buildings'] = buildings.groupby('parcel_id').impval.sum()/17.9 far_predictions["current_yearly_rent_buildings"] = buildings.groupby("parcel_id").impval.sum() far_predictions["current_yearly_rent_buildings"] = far_predictions.current_yearly_rent_buildings.fillna(0) far_predictions.current_yearly_rent_buildings = ( far_predictions.current_yearly_rent_buildings * developer_configuration["land_property_acquisition_cost_factor"] ) far_predictions["county_id"] = parcels.county_id[far_predictions.index].values print far_predictions[far_predictions["current_yearly_rent_buildings"] > 0].groupby( "county_id" ).current_yearly_rent_buildings.mean() if zone_args is not None: # far_predictions.current_yearly_rent_buildings = avgrents.cost_factor*far_predictions.current_yearly_rent_buildings ##Cost scaling happens here far_predictions.current_yearly_rent_buildings = far_predictions.current_yearly_rent_buildings far_predictions["parcelsize"] = parcels.parcel_sqft ###PROFORMA SURFACE CALCULATIONS AND LOOKUPS (TO ARRIVE AT UNCONSTRAINED FARS BY USE) # do the lookup in the developer model - this is where the profitability is computed dev = spotproforma.Developer(profit_factor=developer_configuration["profit_factor"]) for form in spotproforma.forms.keys(): far_predictions[form + "_feasiblefar"], far_predictions[form + "_profit"] = dev.lookup( form, avgrents[spotproforma.uses].as_matrix(), far_predictions.current_yearly_rent_buildings, far_predictions.parcelsize, ) # we now have a far prediction per parcel by allowable building type! #################DEVCONSTRAINTS: Obtain zoning and other development constraints ##### zoning = dset.fetch("zoning") fars = dset.fetch("fars") max_parcel_sqft = 200000 max_far_field = developer_configuration["max_allowable_far_field_name"] if max_far_field not in parcels.columns: parcels = pd.merge(parcels, fars, left_on="far_id", right_index=True) if developer_configuration["enforce_environmental_constraints"]: parcels[max_far_field] = parcels[max_far_field] * ( 1 - parcels.prop_constrained ) # Adjust allowable FAR to account for undevelopable proportion of parcel land if developer_configuration["enforce_ugb"]: parcels[max_far_field][parcels.in_ugb == 0] = ( parcels[max_far_field][parcels.in_ugb == 0] * developer_configuration["outside_ugb_allowable_density"] ) if developer_configuration["uga_policies"]: parcels[max_far_field][parcels.in_uga == 1] = ( parcels[max_far_field][parcels.in_ugb == 1] * developer_configuration["inside_uga_allowable_density"] ) parcels.loc[ parcels.parcel_sqft < developer_configuration["min_lot_sqft"], "max_far_field" ] = 0 # fixed chained index error parcels.loc[parcels.parcel_sqft > max_parcel_sqft, "max_far_field"] = 0 # fixed chained indexing error if "type1" not in parcels.columns: parcels = pd.merge(parcels, zoning, left_on="zoning_id", right_index=True) ##Scale allowable FARs here if needed if zone_args is not None: parcels[max_far_field] = parcels[max_far_field] * avgrents.allowable_density_factor ####### BUILDING TYPE DICTIONARY ##### type_d = {"residential": [2, 3, 20, 24], "industrial": [9, 22], "retail": [17, 18], "office": [5]} ###MERGE ALLOWABLE DENSITY BY USE WITH FEASIBLE DENSITY BY USE (TAKE MINIMUM) TO ARRIVE AT A PARCEL PREDICTION # we have zoning by like 16+ building types and rents/far predictions by 4 more aggregate building types # so we have to convert one into the other parcel_predictions = pd.DataFrame(index=parcels.index) parcel_predictions["county_id"] = parcels.county_id for typ, btypes in type_d.iteritems(): for btype in btypes: # three questions - 1) is type allowed 2) what FAR is allowed 3) is it supported by rents if developer_configuration["enforce_allowable_use_constraints"]: tmp = parcels[parcels["type%d" % btype] == 1][[max_far_field]] # is type allowed far_predictions["type%d_zonedfar" % btype] = tmp[max_far_field] # at what far else: far_predictions["type%d_zonedfar" % btype] = parcels[max_far_field] # merge zoning with feasibility tmp.index.name = "parcel_id" tmp = pd.merge( tmp, far_predictions[[typ + "_feasiblefar"]], left_index=True, right_index=True, how="left" ).set_index(tmp.index) # min of zoning and feasibility parcel_predictions[btype] = pd.Series( np.minimum(tmp[max_far_field], tmp[typ + "_feasiblefar"]), index=tmp.index ) # avgrents2=avgrents.ix[parcels['type%d'%btype]==1] # profit=dev.profit(typ,avgrent2s[spotproforma.uses].as_matrix(),far_predictions.current_yearly_rent_buildings,parcel_prediction[btype]) # print profit # parcel_predictions[btype+'_profit']=pd.Series(profit,index=tmp.index) parcel_predictions = parcel_predictions.dropna(how="all").sort_index(axis=1) for col in parcel_predictions.columns: print col, ( parcel_predictions[col] * far_predictions.parcelsize ).sum() / 1000000.0 ###LIMITING PARCEL PREDICTIONS TO 1MILLION SQFT ####SELECTING SITES np.random.seed(1) p_sample_proportion = 0.5 parcel_predictions = parcel_predictions.ix[ np.random.choice( parcel_predictions.index, int(len(parcel_predictions.index) * p_sample_proportion), replace=False ) ] parcel_predictions.index.name = "parcel_id" parcel_predictions.to_csv( os.path.join(misc.data_dir(), "parcel_predictions.csv"), index_col="parcel_id", float_format="%.2f" ) # far_predictions.to_csv(os.path.join(misc.data_dir(),'far_predictions.csv'),index_col='parcel_id',float_format="%.2f") #####CALL TO THE DEVELOPER newbuildings, price_shifters = new_developer.run( dset, hh_zone_diff, emp_zone_diff, parcel_predictions, year=sim_year, min_building_sqft=developer_configuration["min_building_sqft"], min_lot_sqft=developer_configuration["min_lot_sqft"], max_lot_sqft=max_parcel_sqft, zone_args=zone_args, tot_sqft=dset.zones[["residential_sqft_zone", "non_residential_sqft_zone"]], ) #####APPLY PRICE SHIFTS (PSEUDO-EQUILIBRATION) [MAKE THIS OPTIONAL] print "Applying price shifts" pshift_btypes = [] pshift_zone = [] pshift_shift = [] for item in price_shifters.items(): pshift_btypes.append(item[0][0]) pshift_zone.append(item[0][1]) pshift_shift.append(item[1]) pshift = pd.DataFrame({"btype": pshift_btypes, "zone": pshift_zone, "shift_amount": pshift_shift}) buildings["zone_id"] = parcels.loc[buildings.parcel_id, "zone_id"].values buildings["bid"] = buildings.index.values buildings = pd.merge( buildings, pshift, left_on=["building_type_id", "zone_id"], right_on=["btype", "zone"], how="left" ) buildings.shift_amount = buildings.shift_amount.fillna(1.0) buildings.unit_price_residential = buildings.unit_price_residential * buildings.shift_amount # buildings.unit_price_non_residential = buildings.unit_price_non_residential*buildings.shift_amount buildings.index = buildings.bid ##When net residential units is less than 0, need to implement building demolition newbuildings = newbuildings[["zone_id", "building_type_id", "building_sqft", "residential_units", "lot_size"]] # print newbuildings.building_sqft newbuildings = newbuildings.reset_index() newbuildings.columns = ["parcel_id", "zone_id", "building_type_id", "bldg_sq_ft", "residential_units", "land_area"] newbuildings.parcel_id = newbuildings.parcel_id.astype("int32") # newbuildings['county_id']=parcel_predictions.county_id[newbuildings.parcel_id].values # why is this here? # print newbuildings[newbuildings.residential_units == 0].groupby('county_id').bldg_sq_ft.sum() newbuildings.residential_units = newbuildings.residential_units.astype("int32") newbuildings.land_area = newbuildings.land_area.astype("int32") newbuildings.building_type_id = newbuildings.building_type_id.astype("int32") newbuildings.parcel_id = newbuildings.parcel_id.astype("int32") newbuildings.bldg_sq_ft = np.round(newbuildings.bldg_sq_ft).astype("int32") newbuildings.bldg_sq_ft2 = np.round(newbuildings.bldg_sq_ft).astype("int32") newbuildings["non_residential_sqft"] = 0 newbuildings.loc[newbuildings.residential_units == 0, "non_residential_sqft"] = newbuildings.bldg_sq_ft newbuildings["improvement_value"] = ( newbuildings.non_residential_sqft * 100 + newbuildings.residential_units * 100000 ).astype("int32") newbuildings["sqft_per_unit"] = 1400 newbuildings.loc[newbuildings.residential_units > 0, "sqft_per_unit"] = 1000 newbuildings["stories"] = np.ceil(newbuildings.bldg_sq_ft * 1.0 / newbuildings.land_area).astype("int32") newbuildings["tax_exempt"] = 0 newbuildings["year_built"] = sim_year newbuildings["unit_price_residential"] = 0.0 newbuildings.loc[newbuildings.residential_units > 0, "unit_price_residential"] = buildings[ buildings.unit_price_residential > 0 ].unit_price_residential.median() newbuildings["unit_price_res_sqft"] = 0.0 newbuildings.loc[newbuildings.residential_units > 0, "unit_price_res_sqft"] = buildings[ buildings.unit_price_res_sqft > 0 ].unit_price_res_sqft.median() newbuildings["unit_price_non_residential"] = 0.0 newbuildings.loc[newbuildings.non_residential_sqft > 0, "unit_price_non_residential"] = buildings[ buildings.unit_price_non_residential > 0 ].unit_price_non_residential.median() ##### XG: originally, impose exogenous prices for new buildings. Now impose average county price # newbuildings['county_id'] = dset.parcels.county_id[newbuildings.parcel_id].values # improper join - index incorrect newbuildings["county_id"] = parcels.loc[newbuildings.parcel_id, "county_id"].values # buildings['county_id'] = dset.parcels.county_id[buildings.parcel_id].values # improper join - index incorrect buildings["county_id"] = parcels.loc[buildings.parcel_id, "county_id"].values u = pd.DataFrame( buildings[(buildings.bldg_sq_ft2 > 0) * (np.in1d(buildings.building_type_id, [2, 3, 20, 24]))] .groupby("county_id") .unit_price_res_sqft.mean() ) u.columns = ["res_price_county"] newbuildings = pd.merge(newbuildings, u, left_on="county_id", right_index=True) u = pd.DataFrame( buildings[(buildings.non_residential_sqft > 0) * (np.in1d(buildings.building_type_id, [5, 9, 17, 18, 22]))] .groupby("county_id") .unit_price_non_residential.mean() ) u.columns = ["nres_price_county"] newbuildings = pd.merge(newbuildings, u, left_on="county_id", right_index=True) u = pd.DataFrame(buildings.groupby("county_id").unit_price_residential.mean()) u.columns = ["unit_res_price_county"] newbuildings = pd.merge(newbuildings, u, left_on="county_id", right_index=True) newbuildings.loc[ (newbuildings.bldg_sq_ft > 0) * (np.in1d(newbuildings.building_type_id, [2, 3, 20, 24])), "unit_price_residential", ] = newbuildings.unit_res_price_county newbuildings.loc[ (newbuildings.bldg_sq_ft > 0) * (np.in1d(newbuildings.building_type_id, [2, 3, 20, 24])), "unit_price_res_sqft" ] = newbuildings.res_price_county newbuildings.loc[ (newbuildings.non_residential_sqft > 0) * (np.in1d(newbuildings.building_type_id, [5, 9, 17, 18, 22])), "unit_price_non_residential", ] = newbuildings.nres_price_county # print newbuildings[(np.in1d(newbuildings.building_type_id,[2,3,20,24]))*(newbuildings['bldg_sq_ft']>0)].groupby('county_id').unit_price_res_sqft.mean() #### end XG newbuildings["building_sqft_per_job"] = 250.0 #####Need to replace with observed newbuildings["non_residential_units"] = ( newbuildings.non_residential_sqft / newbuildings.building_sqft_per_job ).fillna(0) newbuildings["base_year_jobs"] = 0.0 newbuildings["all_units"] = newbuildings.non_residential_units + newbuildings.residential_units newbuildings.non_residential_sqft = newbuildings.non_residential_sqft.astype("int32") newbuildings.tax_exempt = newbuildings.tax_exempt.astype("int32") newbuildings.year_built = newbuildings.year_built.astype("int32") newbuildings.sqft_per_unit = newbuildings.sqft_per_unit.astype("int32") newbuildings = newbuildings.set_index(np.arange(len(newbuildings.index)) + np.amax(buildings.index.values) + 1) buildings = buildings[ [ "zone_id", "building_type_id", "improvement_value", "land_area", "non_residential_sqft", "parcel_id", "residential_units", "sqft_per_unit", "stories", "tax_exempt", "year_built", "bldg_sq_ft", "bldg_sq_ft2", "unit_price_non_residential", "unit_price_residential", "building_sqft_per_job", "non_residential_units", "base_year_jobs", "all_units", "unit_price_res_sqft", ] ] return buildings, newbuildings
def simulate(dset, year, depvar='building_id', alternatives=None, simulation_table=None, output_names=None, agents_groupby=[ 'income_3_tenure', ], transition_config=None, relocation_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) if 'persons' in ct.columns: del ct['persons'] ct["total_number_of_households"] = ( ct["total_number_of_households"] * transition_config['scaling_factor']).astype('int32') hh = dset.fetch('households') persons = dset.fetch('persons') tran = transition.TabularTotalsTransition( ct, 'total_number_of_households') model = transition.TransitionModel(tran) #import pdb; pdb.set_trace() new, added, new_linked = model.transition( hh, year, linked_tables={'linked': (persons, 'household_id')}) new.loc[added, 'building_id'] = -1 dset.d['households'] = new dset.d['persons'] = new_linked['linked'] #calculate mortgage payment values temp_count = 0 buildings = alternatives out_table = pd.DataFrame(columns=dset.households.columns) homeless = pd.DataFrame(columns=dset.households.columns) r = .05 / 12 n = 360 buildings['est_mortgage_payment'] = buildings.unit_price_residential * ( (r * (1 + r)**n) / ((1 + r)**n - 1)) dset.households.index.name = 'household_id' choosers = dset.fetch(simulation_table) if relocation_config['Enabled']: rate_table = dset.store[ relocation_config['relocation_rates_table']].copy() rate_field = "probability_of_relocating" rate_table[rate_field] = rate_table[ rate_field] * .01 * relocation_config['scaling_factor'] movers = dset.relocation_rates(choosers, rate_table, rate_field) choosers[depvar].ix[movers] = -1 movers_all = choosers[choosers[depvar] == -1] county_growth_share = pd.read_csv(os.path.join(misc.data_dir(), 'county_growth_share.csv'), index_col=0) counties = county_growth_share.columns.values current_growth_shares = county_growth_share.loc[year].values movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares) movers_all['county_id'] = movers_counties income_segment = movers_all.groupby('income_grp')[ 'upper_income_grp_val', 'lower_income_grp_val'].agg([np.mean, np.size]) # get county growth control data and merge with income_segements income_segment['county'] = county_growth_share.loc[year].index.values[0] income_segment['growth_share'] = county_growth_share.loc[year][0] copy_df = income_segment.copy() for i in county_growth_share.loc[year][1:].iteritems(): copy_df['county'] = i[0] copy_df['growth_share'] = i[1] income_segment = pd.concat([income_segment, copy_df]) income_segment = income_segment.set_index(['county', income_segment.index]) print "Total new agents and movers = %d" % len(movers_all.index) for seg in income_segment.iterrows(): movers = movers_all[(movers_all['income'] <= seg[1][0]) & (movers_all['income'] >= seg[1][2])] print 'County: %s. Placing %d households in the income range (%d, %d)' % ( seg[0][0], seg[1][1], seg[1][2], seg[1][0]) empty_units = buildings.residential_units.sub(choosers[ choosers['building_id'] != -1].groupby('building_id').size(), fill_value=0) empty_units = empty_units[empty_units > 0].order(ascending=False) print 'number of empty units is %d' % empty_units.sum() alternatives = buildings.ix[np.repeat( empty_units.index.values, empty_units.values.astype('int'))] alternatives = alternatives[alternatives.county_id == int(seg[0][0])] if ((seg[1][2] / 12) <= 0): alts = alternatives[ alternatives['unit_price_residential'] < 186281] elif ((seg[1][2] / 12) >= 55000): alts = alternatives[ alternatives['unit_price_residential'] > 1583400] else: alts = alternatives[alternatives['est_mortgage_payment'] / (seg[1][2] / 12) <= 0.33] if (alts.shape[0] == 0): homeless = pd.concat([choosers, homeless]) print 'Could not place %d households due to income restrictions' % seg[ 1][1] continue pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) ##simulation for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert( dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values, numchoosers) alts_sample = alts alts_sample['join_index'] = np.repeat(segment.index.values, SAMPLE_SIZE) alts_sample = pd.merge(alts_sample, segment, left_on='join_index', right_index=True, suffixes=('', '_r')) chosen = np.zeros((numchoosers, SAMPLE_SIZE)) chosen[:, 0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ( 'mnl', chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) > alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) < alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data, coeff, numalts=SAMPLE_SIZE, returnprobs=1) pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index) new_homes = pd.Series(np.ones(len(movers.index)) * -1, index=movers.index) for name, segment in segments: name_coeff = str(name) name = str(name) p = pdf['segment%s' % name] mask = np.zeros(len(alts.index), dtype='bool') mask = pd.Series(mask, index=alts.index) print "Assigning units to %d agents of segment %s" % (len( segment.index), name) def choose(p, mask, alternatives, segment, new_homes, minsize=None): p = copy.copy(p) p.loc[mask[mask == True].index] = 0 # already chosen try: indexes = np.random.choice(alternatives.index.values, len(segment.index), replace=False, p=p.values / p.values.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" indexes = np.random.choice(alternatives.index.values, len(alternatives.index.values), replace=False, p=p.values / p.values.sum()) if (new_homes.ix[segment[segment.tenure == 2].index. values[:len(alternatives.index.values)]]. shape[0] != 0): new_homes.ix[ segment[segment.tenure == 2].index. values[:len(alternatives.index.values)]] = -2 else: new_homes.ix[segment.index.values[:len( alternatives.index.values )]] = alternatives.index.values mask.loc[indexes] = True return mask, new_homes new_homes.ix[segment.index] = alternatives.loc[ indexes].index.values[:len(new_homes.ix[segment.index])] mask.loc[indexes] = True return mask, new_homes mask, new_homes = choose(p, mask, alts, segment, new_homes) build_cnts = new_homes.value_counts( ) #num households place in each building print "Assigned %d agents to %d locations with %d unplaced" % ( new_homes.size, build_cnts.size, build_cnts.get(-1, 0)) table = dset.households # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') #table.to_sql('tmp_out', engine, if_exists='append') table = table.ix[new_homes.index] out_table = pd.concat([table, out_table]) choosers.loc[table.index] = table #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar])) # old_building_count = buildings.shape[0] # buildings = buildings.drop(new_homes.index) # new_building_count = buildings.shape[0] # print '%d units were filled' %(new_building_count - old_building_count) #buildings = buildings.drop(new_homes) #temp_count += 1 if (temp_count > 50): break #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv') dset.households.loc[out_table.index] = out_table
def estimate_non_res_elasticity(self,zones): dummies = pd.get_dummies(zones.county) zones = pd.concat([zones, dummies], axis=1) zones['avg_far'] = self.buildings_far.groupby('zone_id').far.mean() #use far_x because Xavier's code adds far to buildings #zones = zones[zones.non_residential_sqft_zone>0] ####spatial weights matrix##### #zones = zones.reset_index() #zone_coord = zones[['zone_id','zonecentroid_x', 'zonecentroid_y']] #zone_coord = zone_coord.as_matrix() wqueen = py.queen_from_shapefile(os.path.join(misc.data_dir(),'shapefiles\\zones.shp')) #w = py.weights.Distance.DistanceBand(zone_coord, threshold = 50000, binary = False) #w.transform ='r' #w = py.weights.weights.W(w.neighbors, w.weights) w = py.weights.weights.W(wqueen.neighbors, wqueen.weights) x = zones[['zonal_emp','residential_units_zone']] x = x.apply(np.log1p) #x['ln_emp_aggsector_within_5min'] = zones['ln_emp_aggsector_within_5min'] #x['zone_contains_park'] = zones['zone_contains_park'] x['percent_younghead'] = zones['percent_younghead'] x['Arapahoe'] = zones['Arapahoe'] x['Boulder'] = zones['Boulder'] x['Broomfield'] = zones['Broomfield'] x['Clear Creek'] = zones['Clear Creek'] x['Denver'] = zones['Denver'] x['Douglas'] = zones['Douglas'] x['Elbert'] = zones['Elbert'] x['Gilpin'] = zones['Gilpin'] x['Jefferson'] = zones['Jefferson'] x['Weld'] = zones['Weld'] x=x.fillna(0) x = x.as_matrix() imat = zones[['ln_avg_unit_price_zone','avg_far']] imat = imat.fillna(0) imat = imat.as_matrix() yend = zones['ln_avg_nonres_unit_price_zone'] yend = yend.fillna(0) yend = yend.as_matrix() yend = np.reshape(yend,(zones.shape[0],1)) y = zones['non_residential_sqft_zone'] y = y.fillna(0) y = y.apply(np.log1p) y = y.as_matrix() y = np.reshape(y,(zones.shape[0],1)) imat_names = ['res_price','avg_far'] x_names = ['zonal_emp', 'residential_units_zone', 'percent_younghead','Arapahoe','Boulder','Broomfield','Clear Creek', 'Denver', 'Douglas','Elbert','Gilpin','Jefferson','Weld'] yend_name = ['ln_avg_nonres_unit_price_zone'] y_name = 'non_residential_sqft_zone' reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, w_lags=2,robust ='white', name_x = x_names, name_q = imat_names, name_y = y_name, name_yend = yend_name) # # ######estimation # x = zones[['zonal_emp','residential_units_zone']] # x = x.apply(np.log1p) # #x['ln_emp_aggsector_within_5min'] = zones['ln_emp_aggsector_within_5min'] # #x['zone_contains_park'] = zones['zone_contains_park'] # x['percent_younghead'] = zones['percent_younghead'] # x=x.fillna(0) # x = x.as_matrix() # # imat = zones[['ln_avg_unit_price_zone','ln_avg_land_value_per_sqft_zone','median_year_built']] # imat = imat.fillna(0) # imat = imat.as_matrix() # # yend = zones['ln_avg_nonres_unit_price_zone'] # yend = yend.fillna(0) # yend = yend.as_matrix() # yend = np.reshape(yend,(zones.shape[0],1)) # # y = zones['non_residential_sqft_zone'] # y = y.fillna(0) # y = y.apply(np.log1p) # y = y.as_matrix() # y = np.reshape(y,(zones.shape[0],1)) # # # imat_names = ['res_price','land_value','median_year_built'] # x_names = ['zonal_emp', 'residential_units_zone', 'percent_younghead'] # yend_name = ['ln_avg_nonres_unit_price_zone'] # y_name = 'non_residential_sqft_zone' # # reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, robust ='white', name_x = x_names, name_q = imat_names, name_y = y_name, name_yend = yend_name) # # demand_elasticity = np.absolute(reg_2sls.betas[14]) demand_elasticity = 1/demand_elasticity[0] # return demand_elasticity
# # dset.establishments.loc[series.index, "building_id"] = selected_ids # dset.establishments.loc[series.index, "zone_id"] = zone_ids def test_fnc(df, testParam1, testParam2): print type(df) print type(testParam1) print type(testParam2) #print type(testParam3) if __name__ == '__main__': from drcog.models import dataset from drcog.variables import variable_library import os import cProfile dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5')) #Load estimated coefficients coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs.h5')) dset.coeffs = coeff_store.coeffs.copy() coeff_store.close() coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs_res.h5')) dset.coeffs_res = coeff_store.coeffs_res.copy() coeff_store.close() variable_library.calculate_variables(dset) sim_year = 2011 alternatives = dset.buildings[(dset.buildings.non_residential_sqft>0)] simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'establishments',output_names = ("drcog-coeff-elcm-%s.csv","DRCOG EMPLOYMENT LOCATION CHOICE MODELS (%s)","emp_location_%s","establishment_building_ids"),
def run(dset, current_year): """Refines zone level model results """ b = dset.buildings p = dset.parcels if p.index.name != 'parcel_id': p = p.set_index('parcel_id') z = dset.zones e = dset.establishments hh = dset.households zone_refine = pd.read_csv(os.path.join(misc.data_dir(),'zone_demand_refine_no_broomfield.csv')) shuffled_hh_id = np.random.shuffle(hh.index.values) shuffled_emp_id = np.random.shuffle(e.index.values) def relocate_agents(agents_joined,zone_id,number_of_agents): agent_pool = agents_joined[agents_joined.zone_id!=zone_id] #shuffled_ids = agent_pool.index.values #np.random.shuffle(shuffled_ids) #agents_to_relocate = shuffled_ids[:number_of_agents] #idx_agents_to_relocate = np.in1d(dset.households.index.values,agents_to_relocate) random_sample = random.sample(agent_pool.index, number_of_agents) # new_building_id = b[b.zone_id==zone_id].index.values[0] # dset.households.building_id[idx_agents_to_relocate] = new_building_id #try: new_building_id = b[b.zone_id==zone_id].index.values[0] agents_joined.loc[random_sample, "building_id"] = new_building_id # except: # print 'No buildings in specified zone.' # if zone_id not in dset.parcels.zone_id.values: # county = z.county.values[z.index.values==zone_id][0] # x = z.zonecentroid_x.values[z.index.values==zone_id][0] # y = z.zonecentroid_y.values[z.index.values==zone_id][0] # if county == 'Denver': # county_id = 8031 # elif county == 'Adams': # county_id = 8001 # elif county == 'Arapahoe': # county_id = 8005 # elif county == 'Boulder': # county_id = 8013 # elif county == 'Broomfield': # county_id = 8014 # elif county == 'Clear Creek': # county_id = 8019 # elif county == 'Douglas': # county_id = 8035 # elif county == 'Elbert': # county_id = 8039 # elif county == 'Gilpin': # county_id = 8047 # elif county == 'Jefferson': # county_id = 8059 # elif county == 'Weld': # county_id = 8123 # pid = p.index.values.max()+1 # newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id], # 'centroid_x':[x],'centroid_y':[y],'x':[x],'y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0], # 'prop_constrained':[0.0],'acres':[1.0] }) # newparcel.index = np.array([pid]) # dset.d['parcels'] = pd.concat([p,newparcel]) # dset.parcels.index.name = 'parcel_id' # else: # pid = p.index.values[p.zone_id==zone_id][0] # print 'Constructing small structure to place agents' # new_building_id = dset.buildings.index.values.max() + 1 # newbuildings = pd.DataFrame({'building_type_id':[20],'improvement_value':[10000],'land_area':[200],'non_residential_sqft':[0], # 'parcel_id':[pid],'residential_units':[2],'sqft_per_unit':[250],'stories':[0],'tax_exempt':[0],'year_built':[2000],'bldg_sq_ft':[500], # 'unit_price_non_residential':[0.0],'unit_price_residential':[5000.0], 'building_sqft_per_job':[0.0], # 'non_residential_units':[0],'base_year_jobs':[0.0],'all_units':[2]}) # newbuildings.index = np.array([new_building_id]) # dset.d['buildings'] = pd.concat([dset.buildings,newbuildings]) # dset.buildings.index.name = 'building_id' # agents_joined.building_id[idx_agents_to_relocate] = new_building_id def unplace_agents(agents_joined,zone_id,number_of_agents): number_of_agents = -number_of_agents #flip the sign agent_pool = agents_joined[agents_joined.zone_id==zone_id] ##Notice the equality instead of disequality if len(agent_pool) > number_of_agents: #shuffled_ids = agent_pool.index.values #np.random.shuffle(shuffled_ids) #agents_to_relocate = shuffled_ids[:number_of_agents] #idx_agents_to_relocate = np.in1d(dset.households.index.values,agents_to_relocate) random_sample = random.sample(agent_pool.index, number_of_agents) dset.households.building_id[random_sample] = -1 #unplace def relocate_estabs(agents_joined,zone_id,number_of_agents): agent_pool = agents_joined[(agents_joined.zone_id!=zone_id)] e_sample = agent_pool.reindex(np.random.permutation(agent_pool.index)) e_to_move = e_sample[np.cumsum(e_sample['employees'].values)<abs(number_of_agents+10)] shuffled_ids = e_to_move.index.values #np.random.shuffle(shuffled_ids) agents_to_relocate = shuffled_ids idx_agents_to_relocate = np.in1d(dset.establishments.index.values,agents_to_relocate) # new_building_id = b[b.zone_id==zone_id].index.values[0] # dset.establishments.building_id[idx_agents_to_relocate] = new_building_id #try: new_building_id = b[b.zone_id==zone_id].index.values[0] agents_joined.loc[idx_agents_to_relocate, "building_id"] = new_building_id # corrected chain index error # except: # print 'No buildings in specified zone.' # if zone_id not in dset.parcels.zone_id.values: # county = z.county.values[z.index.values==zone_id][0] # x = z.zonecentroid_x.values[z.index.values==zone_id][0] # y = z.zonecentroid_y.values[z.index.values==zone_id][0] # if county == 'Denver': # county_id = 8031 # elif county == 'Adams': # county_id = 8001 # elif county == 'Arapahoe': # county_id = 8005 # elif county == 'Boulder': # county_id = 8013 # elif county == 'Broomfield': # county_id = 8014 # elif county == 'Clear Creek': # county_id = 8019 # elif county == 'Douglas': # county_id = 8035 # elif county == 'Elbert': # county_id = 8039 # elif county == 'Gilpin': # county_id = 8047 # elif county == 'Jefferson': # county_id = 8059 # elif county == 'Weld': # county_id = 8123 # pid = p.index.values.max()+1 # newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id], # 'centroid_x':[x],'centroid_y':[y],'x':[x],'y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0], # 'prop_constrained':[0.0],'acres':[1.0] }) # newparcel.index = np.array([pid]) # dset.d['parcels'] = pd.concat([p,newparcel]) # dset.parcels.index.name = 'parcel_id' # else: # pid = p.index.values[p.zone_id==zone_id][0] # print 'Constructing small structure to place agents' # new_building_id = dset.buildings.index.values.max() + 1 # newbuildings = pd.DataFrame({'building_type_id':[4],'improvement_value':[10000],'land_area':[200],'non_residential_sqft':[500], # 'parcel_id':[pid],'residential_units':[0],'sqft_per_unit':[0],'stories':[0],'tax_exempt':[0],'year_built':[2000],'bldg_sq_ft':[500], # 'unit_price_non_residential':[2.0],'unit_price_residential':[0.0], 'building_sqft_per_job':[250.0], # 'non_residential_units':[2],'base_year_jobs':[0.0],'all_units':[2]}) # newbuildings.index = np.array([new_building_id]) # dset.d['buildings'] = pd.concat([dset.buildings,newbuildings]) # dset.buildings.index.name = 'building_id' # agents_joined.loc[idx_agents_to_relocate, "building_id"] = new_building_id # corrected chain index error def unplace_estabs(agents_joined,zone_id,number_of_agents): number_of_agents = -number_of_agents #flip the sign agent_pool = agents_joined[agents_joined.zone_id==zone_id] ##Notice the equality instead of disequality if agent_pool.employees.sum() > number_of_agents: e_sample = agent_pool.reindex(np.random.permutation(agent_pool.index)) e_to_move = e_sample[np.cumsum(e_sample['employees'].values)<abs(number_of_agents)] shuffled_ids = e_to_move.index.values np.random.shuffle(shuffled_ids) agents_to_relocate = shuffled_ids idx_agents_to_relocate = np.in1d(dset.establishments.index.values,agents_to_relocate) dset.establishments.building_id[idx_agents_to_relocate] = -1 #unplace # for zone in zone_refine.zone_id.values: # idx_zone = (zone_refine.zone_id==zone) # hh_shift = zone_refine.annual_hh_shift[idx_zone].values[0] # emp_shift = zone_refine.annual_emp_shift[idx_zone].values[0] # if hh_shift > 0: # relocate_agents(hh,zone,hh_shift) # if emp_shift > 0: # relocate_estabs(e,zone,emp_shift) # if current_year < 2040: # if hh_shift < 0: # unplace_agents(hh,zone,hh_shift) # if emp_shift < 0: # unplace_agents(e,zone,emp_shift) def refine(series): hh_shift = series.annual_hh_shift emp_shift = series.annual_emp_shift zone = series.zone_id if hh_shift > 0: relocate_agents(hh,zone,hh_shift) if emp_shift > 0: relocate_estabs(e,zone,emp_shift) if current_year < 2040: if hh_shift < 0: unplace_agents(hh,zone,hh_shift) if emp_shift < 0: unplace_estabs(e,zone,emp_shift) zone_refine.apply(refine, axis=1)
def estimate_non_res_elasticity(self, zones): dummies = pd.get_dummies(zones.county) zones = pd.concat([zones, dummies], axis=1) zones['avg_far'] = self.buildings_far.groupby('zone_id').far.mean( ) #use far_x because Xavier's code adds far to buildings #zones = zones[zones.non_residential_sqft_zone>0] ####spatial weights matrix##### #zones = zones.reset_index() #zone_coord = zones[['zone_id','zonecentroid_x', 'zonecentroid_y']] #zone_coord = zone_coord.as_matrix() wqueen = py.queen_from_shapefile( os.path.join(misc.data_dir(), 'shapefiles\\zones.shp')) #w = py.weights.Distance.DistanceBand(zone_coord, threshold = 50000, binary = False) #w.transform ='r' #w = py.weights.weights.W(w.neighbors, w.weights) w = py.weights.weights.W(wqueen.neighbors, wqueen.weights) x = zones[['zonal_emp', 'residential_units_zone']] x = x.apply(np.log1p) #x['ln_emp_aggsector_within_5min'] = zones['ln_emp_aggsector_within_5min'] #x['zone_contains_park'] = zones['zone_contains_park'] x['percent_younghead'] = zones['percent_younghead'] x['Arapahoe'] = zones['Arapahoe'] x['Boulder'] = zones['Boulder'] x['Broomfield'] = zones['Broomfield'] x['Clear Creek'] = zones['Clear Creek'] x['Denver'] = zones['Denver'] x['Douglas'] = zones['Douglas'] x['Elbert'] = zones['Elbert'] x['Gilpin'] = zones['Gilpin'] x['Jefferson'] = zones['Jefferson'] x['Weld'] = zones['Weld'] x = x.fillna(0) x = x.as_matrix() imat = zones[['ln_avg_unit_price_zone', 'avg_far']] imat = imat.fillna(0) imat = imat.as_matrix() yend = zones['ln_avg_nonres_unit_price_zone'] yend = yend.fillna(0) yend = yend.as_matrix() yend = np.reshape(yend, (zones.shape[0], 1)) y = zones['non_residential_sqft_zone'] y = y.fillna(0) y = y.apply(np.log1p) y = y.as_matrix() y = np.reshape(y, (zones.shape[0], 1)) imat_names = ['res_price', 'avg_far'] x_names = [ 'zonal_emp', 'residential_units_zone', 'percent_younghead', 'Arapahoe', 'Boulder', 'Broomfield', 'Clear Creek', 'Denver', 'Douglas', 'Elbert', 'Gilpin', 'Jefferson', 'Weld' ] yend_name = ['ln_avg_nonres_unit_price_zone'] y_name = 'non_residential_sqft_zone' reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, w_lags=2, robust='white', name_x=x_names, name_q=imat_names, name_y=y_name, name_yend=yend_name) # # ######estimation # x = zones[['zonal_emp','residential_units_zone']] # x = x.apply(np.log1p) # #x['ln_emp_aggsector_within_5min'] = zones['ln_emp_aggsector_within_5min'] # #x['zone_contains_park'] = zones['zone_contains_park'] # x['percent_younghead'] = zones['percent_younghead'] # x=x.fillna(0) # x = x.as_matrix() # # imat = zones[['ln_avg_unit_price_zone','ln_avg_land_value_per_sqft_zone','median_year_built']] # imat = imat.fillna(0) # imat = imat.as_matrix() # # yend = zones['ln_avg_nonres_unit_price_zone'] # yend = yend.fillna(0) # yend = yend.as_matrix() # yend = np.reshape(yend,(zones.shape[0],1)) # # y = zones['non_residential_sqft_zone'] # y = y.fillna(0) # y = y.apply(np.log1p) # y = y.as_matrix() # y = np.reshape(y,(zones.shape[0],1)) # # # imat_names = ['res_price','land_value','median_year_built'] # x_names = ['zonal_emp', 'residential_units_zone', 'percent_younghead'] # yend_name = ['ln_avg_nonres_unit_price_zone'] # y_name = 'non_residential_sqft_zone' # # reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, robust ='white', name_x = x_names, name_q = imat_names, name_y = y_name, name_yend = yend_name) # # demand_elasticity = np.absolute(reg_2sls.betas[14]) demand_elasticity = 1 / demand_elasticity[0] # return demand_elasticity
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = None, output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) if 'persons' in ct.columns: del ct['persons'] ct["total_number_of_households"] = (ct["total_number_of_households"]*transition_config['scaling_factor']).astype('int32') hh = dset.fetch('households') persons = dset.fetch('persons') tran = transition.TabularTotalsTransition(ct, 'total_number_of_households') model = transition.TransitionModel(tran) #import pdb; pdb.set_trace() new, added, new_linked = model.transition( hh, year, linked_tables={'linked': (persons, 'household_id')}) new.loc[added,'building_id'] = -1 dset.d['households'] = new dset.d['persons'] = new_linked['linked'] #calculate mortgage payment values temp_count = 0 buildings = alternatives out_table = pd.DataFrame(columns=dset.households.columns) homeless = pd.DataFrame(columns=dset.households.columns) r = .05/12 n = 360 buildings['est_mortgage_payment']=buildings.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1)) dset.households.index.name = 'household_id' choosers = dset.fetch(simulation_table) if relocation_config['Enabled']: rate_table = dset.store[relocation_config['relocation_rates_table']].copy() rate_field = "probability_of_relocating" rate_table[rate_field] = rate_table[rate_field]*.01*relocation_config['scaling_factor'] movers = dset.relocation_rates(choosers,rate_table,rate_field) choosers[depvar].ix[movers] = -1 movers_all = choosers[choosers[depvar]==-1] county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),'county_growth_share.csv'), index_col=0 ) counties = county_growth_share.columns.values current_growth_shares = county_growth_share.loc[year].values movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares) movers_all['county_id'] = movers_counties income_segment = movers_all.groupby('income_grp')['upper_income_grp_val','lower_income_grp_val'].agg([np.mean, np.size]) # get county growth control data and merge with income_segements income_segment['county'] = county_growth_share.loc[year].index.values[0] income_segment['growth_share'] = county_growth_share.loc[year][0] copy_df = income_segment.copy() for i in county_growth_share.loc[year][1:].iteritems(): copy_df['county'] = i[0] copy_df['growth_share'] = i[1] income_segment = pd.concat([income_segment, copy_df]) income_segment = income_segment.set_index(['county', income_segment.index]) print "Total new agents and movers = %d" % len(movers_all.index) for seg in income_segment.iterrows(): movers = movers_all[(movers_all['income']<= seg[1][0]) & (movers_all['income']>= seg[1][2])] print 'County: %s. Placing %d households in the income range (%d, %d)' % (seg[0][0],seg[1][1],seg[1][2], seg[1][0]) empty_units = buildings.residential_units.sub(choosers[choosers['building_id']!=-1].groupby('building_id').size(),fill_value=0) empty_units = empty_units[empty_units>0].order(ascending=False) print 'number of empty units is %d' %empty_units.sum() alternatives = buildings.ix[np.repeat(empty_units.index.values,empty_units.values.astype('int'))] alternatives = alternatives[alternatives.county_id == int(seg[0][0])] if((seg[1][2]/12) <= 0): alts = alternatives[alternatives['unit_price_residential'] < 186281] elif((seg[1][2]/12) >= 55000): alts = alternatives[alternatives['unit_price_residential'] > 1583400] else: alts = alternatives[alternatives['est_mortgage_payment'] / (seg[1][2]/12) <= 0.33] if(alts.shape[0] == 0): homeless = pd.concat([choosers, homeless]) print 'Could not place %d households due to income restrictions' % seg[1][1] continue pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) ##simulation for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values,numchoosers) alts_sample = alts alts_sample['join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE) alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r')) chosen = np.zeros((numchoosers,SAMPLE_SIZE)) chosen[:,0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1) pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index) new_homes = pd.Series(np.ones(len(movers.index))*-1,index=movers.index) for name, segment in segments: name_coeff = str(name) name = str(name) p=pdf['segment%s'%name] mask = np.zeros(len(alts.index),dtype='bool') mask = pd.Series(mask, index=alts.index) print "Assigning units to %d agents of segment %s" % (len(segment.index),name) def choose(p,mask,alternatives,segment,new_homes,minsize=None): p = copy.copy(p) p.loc[mask[mask==True].index] = 0 # already chosen try: indexes = np.random.choice(alternatives.index.values,len(segment.index),replace=False,p=p.values/p.values.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" indexes = np.random.choice(alternatives.index.values,len(alternatives.index.values),replace=False,p=p.values/p.values.sum()) if(new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]].shape[0] != 0): new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]] = -2 else: new_homes.ix[segment.index.values[:len(alternatives.index.values)]] = alternatives.index.values mask.loc[indexes] = True return mask,new_homes new_homes.ix[segment.index] = alternatives.loc[indexes].index.values[:len(new_homes.ix[segment.index])] mask.loc[indexes] = True return mask,new_homes mask,new_homes = choose(p,mask,alts,segment,new_homes) build_cnts = new_homes.value_counts() #num households place in each building print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0)) table = dset.households # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') #table.to_sql('tmp_out', engine, if_exists='append') table = table.ix[new_homes.index] out_table = pd.concat([table, out_table]) choosers.loc[table.index] = table #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar])) # old_building_count = buildings.shape[0] # buildings = buildings.drop(new_homes.index) # new_building_count = buildings.shape[0] # print '%d units were filled' %(new_building_count - old_building_count) #buildings = buildings.drop(new_homes) #temp_count += 1 if(temp_count > 50): break #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv') dset.households.loc[out_table.index] = out_table
def add_rows(data, nrows, starting_index=None): """ Add rows to data table according to a given nrows. New rows will have their IDs set to NaN. Parameters ---------- data : pandas.DataFrame nrows : int Number of rows to add. starting_index : int, optional The starting index from which to calculate indexes for the new rows. If not given the max + 1 of the index of `data` will be used. Returns ------- updated : pandas.DataFrame Table with rows added. New rows will have their index values set to NaN. added : pandas.Index New indexes of the rows that were added. copied : pandas.Index Indexes of rows that were copied. A row copied multiple times will have multiple entries. """ if nrows == 0: return data, _empty_index(), _empty_index() if not starting_index: starting_index = data.index.values.max() + 1 ###added code to alter age distribution per State Demographer's data # import migration data migration = pd.read_csv(os.path.join(misc.data_dir(), "NetMigrationByAge.csv")) # migration = pd.read_csv('C:/users/jmartinez/documents/Age Distribution UrbanSim/NetMigrationByAge.csv') migration.columns = ["county", "age", "net_migration"] migration = migration[15:90] # only use ages that are in the households table migration["prob_age"] = migration["net_migration"] / migration.net_migration.sum() # create weights array random_ages = np.random.choice( migration.age, nrows, p=migration.prob_age ) # randomly choose ages with with wighted pdf frame = pd.DataFrame() frame["ages"] = random_ages grp = frame.groupby( "ages" ).size() # group by age to know the number of ages randomly chosen from above random choice (line 55) agg_list = [] for i in grp.iteritems(): age_val = i[0] age_count = i[1] array = np.random.choice(data[data.age_of_head == age_val].index.values, age_count) for j in array: agg_list.append(j) #####original code # i_to_copy = np.random.choice(data.index.values, nrows) ###randomly chooses household index to copy -- could make it better by assigning a distribution to weight picks based on likely new household characteristics new_rows = data.loc[agg_list].copy() # creates new dataframe of copied households added_index = pd.Index(np.arange(starting_index, starting_index + nrows, dtype=np.int)) new_rows.index = added_index # correctly assigns index ###temporarily export for analysis # new_rows.to_csv('C:/users/jmartinez/documents/Age Distribution UrbanSim/households_newdist.csv') return pd.concat([data, new_rows]), added_index, pd.Index(agg_list)
def calculate_variables(dset): ##PARCEL VARIABLES # XG: Fix the mismatch between zone and county p = dset.parcels del p['county_id'] zone_county = pd.read_csv('C:\urbansim\data/TAZ_County_Table.csv') zone_county = zone_county.set_index('zone_id') zone_county = zone_county[['county_id']] p = pd.merge(p, zone_county, left_on='zone_id', right_index=True) pu = p #end of fix if p.index.name != 'parcel_id': p = p.set_index('parcel_id') print p[p.zone_id == 1725].x p['in_denver'] = (p.county_id == 8031).astype('int32') p['ln_dist_rail'] = p.dist_rail.apply(np.log1p) p['ln_dist_bus'] = p.dist_bus.apply(np.log1p) p['ln_land_value'] = p.land_value.apply(np.log1p) p['land_value_per_sqft'] = p.land_value * 1.0 / p.parcel_sqft p['rail_within_mile'] = (p.dist_rail < 5280).astype('int32') p['cherry_creek_school_district'] = ( p.school_district == 8).astype('int32') p['acres'] = p.parcel_sqft / 43560.0 p['ln_acres'] = (p.parcel_sqft / 43560.0).apply(np.log1p) #BUILDING VARIABLES b = dset.fetch('buildings', building_sqft_per_job_table=elcm_configuration[ 'building_sqft_per_job_table'], bsqft_job_scaling=elcm_configuration['scaling_factor']) b = b[[ 'building_type_id', 'improvement_value', 'land_area', 'non_residential_sqft', 'parcel_id', 'residential_units', 'sqft_per_unit', 'stories', 'tax_exempt', 'year_built', 'bldg_sq_ft', 'unit_price_non_residential', 'unit_price_residential' ]] b.loc[:, 'zone_id'] = p.zone_id[b.parcel_id].values bsqft_job = dset.building_sqft_per_job #bsqft_job.building_sqft_per_job = bsqft_job.building_sqft_per_job b = pd.merge(b, bsqft_job, left_on=['zone_id', 'building_type_id'], right_index=True, how='left') b["non_residential_units"] = b.non_residential_sqft / b.building_sqft_per_job ##### b["base_year_jobs"] = dset.establishments.groupby( 'building_id').employees.sum() # things get all screwed up if you have overfull buildings b["non_residential_units"] = b[["non_residential_units", "base_year_jobs"]].max(axis=1) b["all_units"] = b.residential_units + b.non_residential_units b['county_id'] = p.county_id[b.parcel_id].values b['townhome'] = (b.building_type_id == 24).astype('int32') b['multifamily'] = (np.in1d(b.building_type_id, [2, 3])).astype('int32') b['office'] = (b.building_type_id == 5).astype('int32') b['retail_or_restaurant'] = (np.in1d(b.building_type_id, [17, 18])).astype('int32') b['industrial_building'] = (np.in1d(b.building_type_id, [9, 22])).astype('int32') b['residential_sqft'] = (b.bldg_sq_ft - b.non_residential_sqft) b['btype_hlcm'] = 1 * (b.building_type_id == 2) + 2 * (b.building_type_id == 3) + 3 * ( b.building_type_id == 20) + 4 * np.invert( np.in1d(b.building_type_id, [2, 3, 20])) b['county8001'] = (b.county_id == 8001).astype('int32') b['county8005'] = (b.county_id == 8005).astype('int32') b['county8013'] = (b.county_id == 8013).astype('int32') b['county8014'] = (b.county_id == 8014).astype('int32') b['county8019'] = (b.county_id == 8019).astype('int32') b['county8031'] = (b.county_id == 8031).astype('int32') b['county8035'] = (b.county_id == 8035).astype('int32') b['county8039'] = (b.county_id == 8039).astype('int32') b['county8047'] = (b.county_id == 8047).astype('int32') b['county8059'] = (b.county_id == 8059).astype('int32') b['county8123'] = (b.county_id == 8123).astype('int32') b['unit_price_res_sqft'] = b[ b.residential_units > 0].unit_price_residential / b[ b.residential_units > 0].bldg_sq_ft p['nonres_far'] = (b.groupby('parcel_id').non_residential_sqft.sum() / p.acres).apply(np.log1p) p['ln_units_per_acre'] = (b.groupby('parcel_id').residential_units.sum() / p.acres).apply(np.log1p) #HOUSEHOLD VARIABLES hh_estim = dset.fetch('households_for_estimation') hh_estim['tenure'] = 1 hh_estim.loc[hh_estim.own > 1, "tenure"] = 2 # corrected chained index error hh_estim['income'] = 0 hh_estim.loc[hh_estim.income_group == 1, "income"] = 7500 # corrected chained index error hh_estim.loc[hh_estim.income_group == 2, "income"] = 17500 # corrected chained index error hh_estim.loc[hh_estim.income_group == 3, "income"] = 25000 # corrected chained index error hh_estim.loc[hh_estim.income_group == 4, "income"] = 35000 # corrected chained index error hh_estim.loc[hh_estim.income_group == 5, "income"] = 45000 # corrected chained index error hh_estim.loc[hh_estim.income_group == 6, "income"] = 55000 # corrected chained index error hh_estim.loc[hh_estim.income_group == 7, "income"] = 67500 # corrected chained index error hh_estim.loc[hh_estim.income_group == 8, "income"] = 87500 # corrected chained index error hh_estim.loc[hh_estim.income_group == 9, "income"] = 117500 # corrected chained index error hh_estim.loc[hh_estim.income_group == 10, "income"] = 142500 # corrected chained index error hh_estim.loc[hh_estim.income_group == 11, "income"] = 200000 # corrected chained index error hh = dset.fetch('households') for table in [hh_estim, hh]: choosers = table choosers['zone_id'] = b.zone_id[choosers.building_id].values choosers['building_type_id'] = b.building_type_id[ choosers.building_id].values choosers['county_id'] = b.county_id[choosers.building_id].values choosers['btype'] = 1 * (choosers.building_type_id == 2) + 2 * ( choosers.building_type_id == 3) + 3 * (choosers.building_type_id == 20) + 4 * np.invert( np.in1d(choosers.building_type_id, [2, 3, 20])) choosers['income_3_tenure'] = 1 * (choosers.income < 60000) * ( choosers.tenure == 1) + 2 * np.logical_and( choosers.income >= 60000, choosers.income < 120000 ) * (choosers.tenure == 1) + 3 * (choosers.income >= 120000) * ( choosers.tenure == 1) + 4 * (choosers.income < 40000) * ( choosers.tenure == 2) + 5 * (choosers.income >= 40000) * ( choosers.tenure == 2) choosers['younghead'] = choosers.age_of_head < 30 choosers['hh_with_child'] = choosers.children > 0 choosers['ln_income'] = choosers.income.apply(np.log1p) choosers['income5xlt'] = choosers.income * 5.0 choosers['income10xlt'] = choosers.income * 5.0 choosers['wkrs_hhs'] = choosers.workers * 1.0 / choosers.persons #ESTABLISHMENT VARIABLES e = dset.fetch('establishments') e['zone_id'] = b.zone_id[e.building_id].values e['county_id'] = b.county_id[e.building_id].values e['sector_id_six'] = 1 * (e.sector_id == 61) + 2 * ( e.sector_id == 71) + 3 * np.in1d(e.sector_id, [ 11, 21, 22, 23, 31, 32, 33, 42, 48, 49 ]) + 4 * np.in1d(e.sector_id, [7221, 7222, 7224]) + 5 * np.in1d( e.sector_id, [44, 45, 7211, 7212, 7213, 7223]) + 6 * np.in1d( e.sector_id, [51, 52, 53, 54, 55, 56, 62, 81, 92]) e['sector_id_retail_agg'] = e.sector_id * np.logical_not( np.in1d(e.sector_id, [7211, 7212, 7213])) + 7211 * np.in1d( e.sector_id, [7211, 7212, 7213]) e['nonres_sqft'] = b.non_residential_sqft[e.building_id].values #ZONE VARIABLES #XG: fix the mismatch zone county z = dset.fetch('zones') del z['county'] z['zone_id'] = z.index zone_county = pd.read_csv( os.path.join(misc.data_dir(), 'TAZ_County_Table.csv')) zone_county = zone_county.set_index('zone_id') zone_county = zone_county[['county']] z = pd.merge(z, zone_county, left_on='zone_id', right_index=True) del z['zone_id'] zu = z #end of fix z['zonal_hh'] = hh.groupby('zone_id').size() z['zonal_emp'] = e.groupby('zone_id').employees.sum() z['zone_id'] = z.index print z.columns print z[z['zone_id'] == 1722]['zonal_emp'] del z['zone_id'] z['residential_sqft_zone'] = b.groupby('zone_id').residential_sqft.sum() z['zonal_pop'] = hh.groupby('zone_id').persons.sum() z['residential_units_zone'] = b.groupby('zone_id').residential_units.sum() z['ln_residential_units_zone'] = b.groupby( 'zone_id').residential_units.sum().apply(np.log1p) z['ln_residential_unit_density_zone'] = ( b.groupby('zone_id').residential_units.sum() / z.acreage).apply( np.log1p) z['non_residential_sqft_zone'] = b.groupby( 'zone_id').non_residential_sqft.sum() z['ln_non_residential_sqft_zone'] = b.groupby( 'zone_id').non_residential_sqft.sum().apply(np.log1p) z['percent_sf'] = b[b.btype_hlcm == 3].groupby( 'zone_id').residential_units.sum() * 100.0 / ( b.groupby('zone_id').residential_units.sum()) z['avg_unit_price_zone'] = b[(b.residential_units > 0) * (b.improvement_value > 0)].groupby( 'zone_id').unit_price_residential.mean() z['ln_avg_unit_price_zone'] = b[ (b.residential_units > 0) * (b.improvement_value > 0)].groupby( 'zone_id').unit_price_residential.mean().apply(np.log1p) z['ln_avg_nonres_unit_price_zone'] = b[ (b.non_residential_sqft > 0) * (b.improvement_value > 0)].groupby( 'zone_id').unit_price_non_residential.mean().apply(np.log1p) z['median_age_of_head'] = hh.groupby('zone_id').age_of_head.median() z['mean_income'] = hh.groupby('zone_id').income.mean() z['median_year_built'] = b.groupby('zone_id').year_built.median().astype( 'int32') z['ln_avg_land_value_per_sqft_zone'] = p.groupby( 'zone_id').land_value_per_sqft.mean().apply(np.log1p) z['median_yearbuilt_post_1990'] = ( b.groupby('zone_id').year_built.median() > 1990).astype('int32') z['median_yearbuilt_pre_1950'] = (b.groupby('zone_id').year_built.median() < 1950).astype('int32') z['percent_hh_with_child'] = hh[hh.children > 0].groupby( 'zone_id').size() * 100.0 / z.zonal_hh z['percent_renter_hh_in_zone'] = hh[hh.tenure == 2].groupby( 'zone_id').size() * 100.0 / z.zonal_hh z['percent_younghead'] = hh[hh.age_of_head < 30].groupby( 'zone_id').size() * 100.0 / z.zonal_hh z['average_resunit_size'] = b.groupby('zone_id').sqft_per_unit.mean() z['zone_contains_park'] = (p[p.lu_type_id == 14].groupby('zone_id').size() > 0).astype('int32') z['emp_sector_agg'] = e[e.sector_id == 1].groupby( 'zone_id').employees.sum() z['emp_sector1'] = e[e.sector_id_six == 1].groupby( 'zone_id').employees.sum() z['emp_sector2'] = e[e.sector_id_six == 2].groupby( 'zone_id').employees.sum() z['emp_sector3'] = e[e.sector_id_six == 3].groupby( 'zone_id').employees.sum() z['emp_sector4'] = e[e.sector_id_six == 4].groupby( 'zone_id').employees.sum() z['emp_sector5'] = e[e.sector_id_six == 5].groupby( 'zone_id').employees.sum() z['emp_sector6'] = e[e.sector_id_six == 6].groupby( 'zone_id').employees.sum() z['jobs_within_45min'] = dset.compute_range(z.zonal_emp, 45.0) z['ln_jobs_within_45min'] = dset.compute_range(z.zonal_emp, 45.0).apply(np.log1p) z['jobs_within_30min'] = dset.compute_range(z.zonal_emp, 30.0) z['ln_jobs_within_30min'] = dset.compute_range(z.zonal_emp, 30.0).apply(np.log1p) z['jobs_within_20min'] = dset.compute_range(z.zonal_emp, 20.0) z['jobs_within_15min'] = dset.compute_range(z.zonal_emp, 15.0) z['ln_jobs_within_20min'] = dset.compute_range(z.zonal_emp, 20.0).apply(np.log1p) z['ln_pop_within_20min'] = dset.compute_range(z.zonal_pop, 20.0).apply(np.log1p) z['ln_emp_aggsector_within_5min'] = dset.compute_range( z.emp_sector_agg, 5.0).apply(np.log1p) z['ln_emp_sector1_within_15min'] = dset.compute_range(z.emp_sector1, 15.0).apply(np.log1p) z['ln_emp_sector2_within_15min'] = dset.compute_range(z.emp_sector2, 15.0).apply(np.log1p) z['ln_emp_sector3_within_10min'] = dset.compute_range(z.emp_sector3, 15.0).apply(np.log1p) z['ln_emp_sector3_within_15min'] = dset.compute_range(z.emp_sector3, 15.0).apply(np.log1p) z['ln_emp_sector3_within_20min'] = dset.compute_range(z.emp_sector3, 20.0).apply(np.log1p) z['ln_emp_sector4_within_15min'] = dset.compute_range(z.emp_sector4, 15.0).apply(np.log1p) z['ln_emp_sector5_within_15min'] = dset.compute_range(z.emp_sector5, 15.0).apply(np.log1p) z['ln_emp_sector6_within_15min'] = dset.compute_range(z.emp_sector6, 15.0).apply(np.log1p) z['allpurpose_agglosum_floor'] = (z.allpurpose_agglosum >= 0) * (z.allpurpose_agglosum) #Exports (for Tableau-Employment) z['emp_sector1_within_20min'] = dset.compute_range(z.emp_sector1, 20.0) z['emp_sector2_within_20min'] = dset.compute_range(z.emp_sector2, 20.0) z['emp_sector3_within_20min'] = dset.compute_range(z.emp_sector3, 20.0) z['emp_sector4_within_20min'] = dset.compute_range(z.emp_sector4, 20.0) z['emp_sector5_within_20min'] = dset.compute_range(z.emp_sector5, 20.0) z['emp_sector6_within_20min'] = dset.compute_range(z.emp_sector6, 20.0) z['emp_sector1_within_30min'] = dset.compute_range(z.emp_sector1, 30.0) z['emp_sector2_within_30min'] = dset.compute_range(z.emp_sector2, 30.0) z['emp_sector3_within_30min'] = dset.compute_range(z.emp_sector3, 30.0) z['emp_sector4_within_30min'] = dset.compute_range(z.emp_sector4, 30.0) z['emp_sector5_within_30min'] = dset.compute_range(z.emp_sector5, 30.0) z['emp_sector6_within_30min'] = dset.compute_range(z.emp_sector6, 30.0) z['emp_sector1_within_45min'] = dset.compute_range(z.emp_sector1, 45.0) z['emp_sector2_within_45min'] = dset.compute_range(z.emp_sector2, 45.0) z['emp_sector3_within_45min'] = dset.compute_range(z.emp_sector3, 45.0) z['emp_sector4_within_45min'] = dset.compute_range(z.emp_sector4, 45.0) z['emp_sector5_within_45min'] = dset.compute_range(z.emp_sector5, 45.0) z['emp_sector6_within_45min'] = dset.compute_range(z.emp_sector6, 45.0) z['residential_unit_per_jobs_within_15_min'] = z[ 'residential_units_zone'] / z['jobs_within_15min'] z['residential_sqft_per_jobs_within_15_min'] = (b[np.in1d( b['building_type_id'], [2, 3, 20, 24 ])].groupby('zone_id').bldg_sq_ft.sum()) / z['jobs_within_15min'] ztableau = z[[ 'zonal_emp', 'emp_sector1', 'emp_sector2', 'emp_sector3', 'emp_sector4', 'emp_sector5', 'emp_sector6', 'jobs_within_45min', 'jobs_within_30min', 'jobs_within_20min', 'emp_sector1_within_20min', 'emp_sector2_within_20min', 'emp_sector3_within_20min', 'emp_sector4_within_20min', 'emp_sector5_within_20min', 'emp_sector6_within_20min', 'emp_sector1_within_30min', 'emp_sector2_within_30min', 'emp_sector3_within_30min', 'emp_sector4_within_30min', 'emp_sector5_within_30min', 'emp_sector6_within_30min', 'emp_sector1_within_45min', 'emp_sector2_within_45min', 'emp_sector3_within_45min', 'emp_sector4_within_45min', 'emp_sector5_within_45min', 'emp_sector6_within_45min', 'residential_unit_per_jobs_within_15_min', 'residential_sqft_per_jobs_within_15_min' ]] ztableau.to_csv('C:\urbansim\output\emp_tableau.csv') ##JOINS #merge parcels with zones pz = pd.merge(p.reset_index(), z, left_on='zone_id', right_index=True) pz = pz.set_index('parcel_id') #merge buildings with parcels/zones del b['county_id'] del b['zone_id'] bpz = pd.merge(b, pz, left_on='parcel_id', right_index=True) bpz['residential_units_capacity'] = bpz.parcel_sqft / 1500 - bpz.residential_units bpz.loc[bpz.residential_units_capacity < 0, "residential_units_capacity"] = 0 # corrected chained index error dset.d['buildings'] = bpz if dset.parcels.index.name != 'parcel_id': dset.parcels = pu dset.d['zones'] = zu