def gen_probs(dset, movers, agents_groupby, alts, output_names): output_csv, output_title, coeff_name, output_varname = output_names pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert( dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values, numchoosers) alts_sample = alts #sample#alternatives alts_sample['join_index'] = np.repeat(segment.index.values, SAMPLE_SIZE) alts_sample = pd.merge(alts_sample, segment, left_on='join_index', right_index=True, suffixes=('', '_r')) chosen = np.zeros((numchoosers, SAMPLE_SIZE)) chosen[:, 0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl', chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) > alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) < alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data, coeff, numalts=SAMPLE_SIZE, returnprobs=1) pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index) return pdf
def gen_probs(dset, movers, agents_groupby, alts, output_names): output_csv, output_title, coeff_name, output_varname = output_names pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values,numchoosers) alts_sample = alts #sample#alternatives alts_sample['join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE) alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r')) chosen = np.zeros((numchoosers,SAMPLE_SIZE)) chosen[:,0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1) pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index) return pdf
def simulate(dset, year, depvar='building_id', alternatives=None, simulation_table=None, output_names=None, agents_groupby=[ 'income_3_tenure', ], transition_config=None, relocation_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) if 'persons' in ct.columns: del ct['persons'] ct["total_number_of_households"] = ( ct["total_number_of_households"] * transition_config['scaling_factor']).astype('int32') hh = dset.fetch('households') persons = dset.fetch('persons') tran = transition.TabularTotalsTransition( ct, 'total_number_of_households') model = transition.TransitionModel(tran) #import pdb; pdb.set_trace() new, added, new_linked = model.transition( hh, year, linked_tables={'linked': (persons, 'household_id')}) new.loc[added, 'building_id'] = -1 dset.d['households'] = new dset.d['persons'] = new_linked['linked'] #calculate mortgage payment values temp_count = 0 buildings = alternatives out_table = pd.DataFrame(columns=dset.households.columns) homeless = pd.DataFrame(columns=dset.households.columns) r = .05 / 12 n = 360 buildings['est_mortgage_payment'] = buildings.unit_price_residential * ( (r * (1 + r)**n) / ((1 + r)**n - 1)) dset.households.index.name = 'household_id' choosers = dset.fetch(simulation_table) if relocation_config['Enabled']: rate_table = dset.store[ relocation_config['relocation_rates_table']].copy() rate_field = "probability_of_relocating" rate_table[rate_field] = rate_table[ rate_field] * .01 * relocation_config['scaling_factor'] movers = dset.relocation_rates(choosers, rate_table, rate_field) choosers[depvar].ix[movers] = -1 movers_all = choosers[choosers[depvar] == -1] county_growth_share = pd.read_csv(os.path.join(misc.data_dir(), 'county_growth_share.csv'), index_col=0) counties = county_growth_share.columns.values current_growth_shares = county_growth_share.loc[year].values movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares) movers_all['county_id'] = movers_counties income_segment = movers_all.groupby('income_grp')[ 'upper_income_grp_val', 'lower_income_grp_val'].agg([np.mean, np.size]) # get county growth control data and merge with income_segements income_segment['county'] = county_growth_share.loc[year].index.values[0] income_segment['growth_share'] = county_growth_share.loc[year][0] copy_df = income_segment.copy() for i in county_growth_share.loc[year][1:].iteritems(): copy_df['county'] = i[0] copy_df['growth_share'] = i[1] income_segment = pd.concat([income_segment, copy_df]) income_segment = income_segment.set_index(['county', income_segment.index]) print "Total new agents and movers = %d" % len(movers_all.index) for seg in income_segment.iterrows(): movers = movers_all[(movers_all['income'] <= seg[1][0]) & (movers_all['income'] >= seg[1][2])] print 'County: %s. Placing %d households in the income range (%d, %d)' % ( seg[0][0], seg[1][1], seg[1][2], seg[1][0]) empty_units = buildings.residential_units.sub(choosers[ choosers['building_id'] != -1].groupby('building_id').size(), fill_value=0) empty_units = empty_units[empty_units > 0].order(ascending=False) print 'number of empty units is %d' % empty_units.sum() alternatives = buildings.ix[np.repeat( empty_units.index.values, empty_units.values.astype('int'))] alternatives = alternatives[alternatives.county_id == int(seg[0][0])] if ((seg[1][2] / 12) <= 0): alts = alternatives[ alternatives['unit_price_residential'] < 186281] elif ((seg[1][2] / 12) >= 55000): alts = alternatives[ alternatives['unit_price_residential'] > 1583400] else: alts = alternatives[alternatives['est_mortgage_payment'] / (seg[1][2] / 12) <= 0.33] if (alts.shape[0] == 0): homeless = pd.concat([choosers, homeless]) print 'Could not place %d households due to income restrictions' % seg[ 1][1] continue pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) ##simulation for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert( dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values, numchoosers) alts_sample = alts alts_sample['join_index'] = np.repeat(segment.index.values, SAMPLE_SIZE) alts_sample = pd.merge(alts_sample, segment, left_on='join_index', right_index=True, suffixes=('', '_r')) chosen = np.zeros((numchoosers, SAMPLE_SIZE)) chosen[:, 0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ( 'mnl', chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) > alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) < alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data, coeff, numalts=SAMPLE_SIZE, returnprobs=1) pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index) new_homes = pd.Series(np.ones(len(movers.index)) * -1, index=movers.index) for name, segment in segments: name_coeff = str(name) name = str(name) p = pdf['segment%s' % name] mask = np.zeros(len(alts.index), dtype='bool') mask = pd.Series(mask, index=alts.index) print "Assigning units to %d agents of segment %s" % (len( segment.index), name) def choose(p, mask, alternatives, segment, new_homes, minsize=None): p = copy.copy(p) p.loc[mask[mask == True].index] = 0 # already chosen try: indexes = np.random.choice(alternatives.index.values, len(segment.index), replace=False, p=p.values / p.values.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" indexes = np.random.choice(alternatives.index.values, len(alternatives.index.values), replace=False, p=p.values / p.values.sum()) if (new_homes.ix[segment[segment.tenure == 2].index. values[:len(alternatives.index.values)]]. shape[0] != 0): new_homes.ix[ segment[segment.tenure == 2].index. values[:len(alternatives.index.values)]] = -2 else: new_homes.ix[segment.index.values[:len( alternatives.index.values )]] = alternatives.index.values mask.loc[indexes] = True return mask, new_homes new_homes.ix[segment.index] = alternatives.loc[ indexes].index.values[:len(new_homes.ix[segment.index])] mask.loc[indexes] = True return mask, new_homes mask, new_homes = choose(p, mask, alts, segment, new_homes) build_cnts = new_homes.value_counts( ) #num households place in each building print "Assigned %d agents to %d locations with %d unplaced" % ( new_homes.size, build_cnts.size, build_cnts.get(-1, 0)) table = dset.households # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') #table.to_sql('tmp_out', engine, if_exists='append') table = table.ix[new_homes.index] out_table = pd.concat([table, out_table]) choosers.loc[table.index] = table #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar])) # old_building_count = buildings.shape[0] # buildings = buildings.drop(new_homes.index) # new_building_count = buildings.shape[0] # print '%d units were filled' %(new_building_count - old_building_count) #buildings = buildings.drop(new_homes) #temp_count += 1 if (temp_count > 50): break #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv') dset.households.loc[out_table.index] = out_table
def simulate( dset, year, depvar="building_id", alternatives=None, simulation_table="establishments", output_names=None, agents_groupby=["income_3_tenure"], transition_config=None, ): output_csv, output_title, coeff_name, output_varname = output_names if transition_config["Enabled"]: ct = dset.fetch(transition_config["control_totals_table"]) ct["total_number_of_jobs"] = (ct["total_number_of_jobs"] * transition_config["scaling_factor"]).astype("int32") new_jobs = { "table": "dset.establishments", "writetotmp": "establishments", "model": "transitionmodel", "first_year": 2010, "control_totals": "dset.%s" % transition_config["control_totals_table"], "geography_field": "building_id", "amount_field": "total_number_of_jobs", "size_field": "employees", } import synthicity.urbansim.transitionmodel as transitionmodel transitionmodel.simulate(dset, new_jobs, year=year, show=True) dset.establishments.index.name = "establishment_id" choosers = dset.fetch(simulation_table) placed_choosers = choosers[choosers[depvar] > 0] movers = choosers[choosers[depvar] == -1] movers["zone_id"] = -1 print "Total new agents and movers = %d" % len(movers.index) dset.establishments.loc[movers.index, "zone_id"] = -1 print dset.establishments[dset.establishments["zone_id"] == 1834].employees.sum() alternatives.building_sqft_per_job = alternatives.building_sqft_per_job.fillna(1000) alternatives.loc[:, "spaces"] = ( alternatives.non_residential_sqft / alternatives.building_sqft_per_job ) # corrected chained indexing error # alternatives[ 'spaces'] = alternatives.non_residential_sqft/alternatives.building_sqft_per_job # alternatives.loc[:, 'spaces'] = alternatives.non_residential_sqft/1000 empty_units = alternatives.spaces.sub(placed_choosers.groupby("building_id").employees.sum(), fill_value=0).astype( "int" ) empty_units = empty_units[empty_units > 0].order(ascending=False) print empty_units[empty_units.index == 472137] alts = alternatives.ix[empty_units.index] alts["supply"] = empty_units print movers[movers.employees > 4000] u = pd.DataFrame(empty_units) u.columns = ["empty"] u["building_id"] = u.index empty_units_test = pd.merge(u, alternatives[["zone_id"]], left_on="building_id", right_index=True) print empty_units_test[empty_units_test.zone_id == 1834]["empty"].sum() lotterychoices = True pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name ind_vars = dset.coeffs[(tmp_coeffname, "fnames")][ np.invert(dset.coeffs[(tmp_coeffname, "fnames")].isnull().values) ].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values, numchoosers) alts_sample = alts # sample#alternatives alts_sample["join_index"] = np.repeat(segment.index.values, SAMPLE_SIZE) alts_sample = pd.merge(alts_sample, segment, left_on="join_index", right_index=True, suffixes=("", "_r")) chosen = np.zeros((numchoosers, SAMPLE_SIZE)) chosen[:, 0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ("mnl", chosen) ##Interaction variables interaction_vars = [(var, var.split("_x_")) for var in ind_vars if "_x_" in var] for ivar in interaction_vars: if ivar[1][0].endswith("gt"): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) > alternative_sample[ivar[1][1]] ).astype("int32") if ivar[1][0].endswith("lt"): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) < alternative_sample[ivar[1][1]] ).astype("int32") else: alternative_sample[ivar[0]] = (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]] est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data, coeff, numalts=SAMPLE_SIZE, returnprobs=1) pdf["segment%s" % name] = pd.Series(probs.flatten(), index=alts.index) new_homes = pd.Series(np.ones(len(movers.index)) * -1, index=movers.index) mask = np.zeros(len(alts.index), dtype="bool") for name, segment in segments: name = str(name) print "Assigning units to %d agents of segment %s" % (len(segment.index), name) p = pdf["segment%s" % name].values def choose(p, mask, alternatives, segment, new_homes, minsize=None): p = copy.copy(p) # p[alternatives.supply<minsize] = 0 pu = pd.DataFrame(p, index=alternatives.index) pu.columns = ["pro"] pu.loc[alternatives.supply < minsize, "pro"] = 0 # p=p[alternatives.supply>=minsize] pp = np.array(pu).flatten() try: indexes = np.random.choice(len(alternatives.index), len(segment.index), replace=False, p=pp / pp.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" return mask, new_homes new_homes.ix[segment.index] = alternatives.index.values[indexes] alternatives["supply"].ix[alternatives.index.values[indexes]] -= minsize return mask, new_homes tmp = segment["employees"] for name, subsegment in reversed(list(segment.groupby(tmp.astype("int")))): mask, new_homes = choose(p, mask, alts, subsegment, new_homes, minsize=int(name)) build_cnts = new_homes.value_counts() # num estabs place in each building print "Assigned %d agents to %d locations with %d unplaced" % ( new_homes.size, build_cnts.size, build_cnts.get(-1, 0), ) p = dset.parcels p = p.set_index("parcel_id") # b=pd.merge(b, p[['zone_id']], left_on='parcel_id', right_index=True) # est=pd.merge(dset.establishments, b[['zone_id']], left_on='building_id', right_index=True) del dset.establishments["zone_id"] dset.establishments["zone_id"] = pd.merge( dset.establishments, dset.buildings[["zone_id"]], left_on="building_id", right_index=True )["zone_id"] print dset.establishments[dset.establishments["zone_id"] == 1834].employees.sum() placed_choosers = choosers[choosers[depvar] > 0] empty_units = alternatives.spaces.sub(placed_choosers.groupby("building_id").employees.sum(), fill_value=0).astype( "int" ) table = dset.establishments # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype("int32") del table["zone_id"] table["zone_id"] = pd.merge( dset.establishments, dset.buildings[["zone_id"]], left_on="building_id", right_index=True )["zone_id"] print table.groupby("zone_id").employees.sum().loc[1834] table["space"] = 0 # b.building_sqft_per_job = table.building_sqft_per_job.fillna(1000) alternatives.loc[:, "spaces"] = alternatives.non_residential_sqft / alternatives.building_sqft_per_job empty_units = alternatives.spaces.sub( table[table["building_id"] > 0].groupby("building_id").employees.sum(), fill_value=0 ).astype("int") empty_units = empty_units[empty_units > 0].order(ascending=False) u = pd.DataFrame(empty_units) u.columns = ["empty"] u["building_id"] = u.index empty_units_test = pd.merge(u, alternatives[["zone_id"]], left_on="building_id", right_index=True) print empty_units_test[empty_units_test.zone_id == 1834]["empty"].sum() print table[table.index == 472137] dset.store_attr(output_varname, year, copy.deepcopy(table[depvar]))
def simulate(dset, year, depvar='building_id', alternatives=None, simulation_table='establishments', output_names=None, agents_groupby=[ 'income_3_tenure', ], transition_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) ct["total_number_of_jobs"] = ( ct["total_number_of_jobs"] * transition_config['scaling_factor']).astype('int32') new_jobs = { "table": "dset.establishments", "writetotmp": "establishments", "model": "transitionmodel", "first_year": 2010, "control_totals": "dset.%s" % transition_config['control_totals_table'], "geography_field": "building_id", "amount_field": "total_number_of_jobs", "size_field": "employees" } import synthicity.urbansim.transitionmodel as transitionmodel transitionmodel.simulate(dset, new_jobs, year=year, show=True) dset.establishments.index.name = 'establishment_id' choosers = dset.fetch(simulation_table) placed_choosers = choosers[choosers[depvar] > 0] movers = choosers[choosers[depvar] == -1] movers.loc[:, "zone_id"] = -1 print "Total new agents and movers = %d" % len(movers.index) alternatives.building_sqft_per_job = alternatives.building_sqft_per_job.fillna( 1000) alternatives.loc[:, 'spaces'] = alternatives.non_residential_sqft / alternatives.building_sqft_per_job # corrected chained indexing error empty_units = alternatives.spaces.sub( placed_choosers.groupby('building_id').employees.sum(), fill_value=0).astype('int') empty_units = empty_units[empty_units > 0].order(ascending=False) alts = alternatives.ix[empty_units.index] alts["supply"] = empty_units lotterychoices = True pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert( dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values, numchoosers) alts_sample = alts #sample#alternatives alts_sample['join_index'] = np.repeat(segment.index.values, SAMPLE_SIZE) alts_sample = pd.merge(alts_sample, segment, left_on='join_index', right_index=True, suffixes=('', '_r')) chosen = np.zeros((numchoosers, SAMPLE_SIZE)) chosen[:, 0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl', chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) > alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) < alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data, coeff, numalts=SAMPLE_SIZE, returnprobs=1) pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index) new_homes = pd.Series(np.ones(len(movers.index)) * -1, index=movers.index) mask = np.zeros(len(alts.index), dtype='bool') for name, segment in segments: name = str(name) print "Assigning units to %d agents of segment %s" % (len( segment.index), name) p = pdf['segment%s' % name] def choose(p, mask, alternatives, segment, new_homes, minsize=None): choiceset = alternatives.loc[alternatives.supply >= minsize] p = copy.copy(p) #p[alternatives.supply<minsize] = 0 p = p[alternatives.supply >= minsize] p_arr = p.values try: #indexes = np.random.choice(len(alternatives.index),len(segment.index),replace=False,p=p/p.sum()) indexes = np.random.choice(choiceset.index, len(segment.index), replace=False, p=p_arr / p_arr.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" return mask, new_homes #new_homes.ix[segment.index] = alternatives.index.values[indexes] #alternatives["supply"].ix[alternatives.index.values[indexes]] -= minsize new_homes.ix[segment.index] = indexes #alternatives["supply"].ix[indexes] -= minsize alternatives.loc[indexes, "supply"] -= minsize return mask, new_homes tmp = segment['employees'] for name, subsegment in reversed( list(segment.groupby(tmp.astype('int')))): mask, new_homes = choose(p, mask, alts, subsegment, new_homes, minsize=int(name)) build_cnts = new_homes.value_counts() #num estabs place in each building print "Assigned %d agents to %d locations with %d unplaced" % ( new_homes.size, build_cnts.size, build_cnts.get(-1, 0)) new_homes_frame = pd.DataFrame(new_homes, columns=['building_id']) result_set = pd.merge(new_homes_frame, dset.buildings, left_on='building_id', right_index=True, how='left')[['building_id', 'zone_id']] result_set["employees"] = pd.merge(result_set, dset.establishments, left_index=True, right_index=True)['employees'].values table = dset.establishments # need to go back to the whole dataset #table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') table.loc[result_set.index, "building_id"] = -1 table.loc[result_set.index, "zone_id"] = -1 table.loc[result_set.index, "building_id"] = result_set.building_id.values table.loc[result_set.index, "zone_id"] = result_set.zone_id #table["zone_id"] = pd.merge(table, result_set, on='building_id', how='left')['zone_id'].values #table["zone_id"] = pd.merge(table, dset.buildings, left_on='building_id', right_index=True, how='left')["zone_id_y"].values dset.store_attr(output_varname, year, copy.deepcopy(table[depvar]))
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = None, output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) if 'persons' in ct.columns: del ct['persons'] ct["total_number_of_households"] = (ct["total_number_of_households"]*transition_config['scaling_factor']).astype('int32') hh = dset.fetch('households') persons = dset.fetch('persons') tran = transition.TabularTotalsTransition(ct, 'total_number_of_households') model = transition.TransitionModel(tran) #import pdb; pdb.set_trace() new, added, new_linked = model.transition( hh, year, linked_tables={'linked': (persons, 'household_id')}) new.loc[added,'building_id'] = -1 dset.d['households'] = new dset.d['persons'] = new_linked['linked'] #calculate mortgage payment values temp_count = 0 buildings = alternatives out_table = pd.DataFrame(columns=dset.households.columns) homeless = pd.DataFrame(columns=dset.households.columns) r = .05/12 n = 360 buildings['est_mortgage_payment']=buildings.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1)) dset.households.index.name = 'household_id' choosers = dset.fetch(simulation_table) if relocation_config['Enabled']: rate_table = dset.store[relocation_config['relocation_rates_table']].copy() rate_field = "probability_of_relocating" rate_table[rate_field] = rate_table[rate_field]*.01*relocation_config['scaling_factor'] movers = dset.relocation_rates(choosers,rate_table,rate_field) choosers[depvar].ix[movers] = -1 movers_all = choosers[choosers[depvar]==-1] county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),'county_growth_share.csv'), index_col=0 ) counties = county_growth_share.columns.values current_growth_shares = county_growth_share.loc[year].values movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares) movers_all['county_id'] = movers_counties income_segment = movers_all.groupby('income_grp')['upper_income_grp_val','lower_income_grp_val'].agg([np.mean, np.size]) # get county growth control data and merge with income_segements income_segment['county'] = county_growth_share.loc[year].index.values[0] income_segment['growth_share'] = county_growth_share.loc[year][0] copy_df = income_segment.copy() for i in county_growth_share.loc[year][1:].iteritems(): copy_df['county'] = i[0] copy_df['growth_share'] = i[1] income_segment = pd.concat([income_segment, copy_df]) income_segment = income_segment.set_index(['county', income_segment.index]) print "Total new agents and movers = %d" % len(movers_all.index) for seg in income_segment.iterrows(): movers = movers_all[(movers_all['income']<= seg[1][0]) & (movers_all['income']>= seg[1][2])] print 'County: %s. Placing %d households in the income range (%d, %d)' % (seg[0][0],seg[1][1],seg[1][2], seg[1][0]) empty_units = buildings.residential_units.sub(choosers[choosers['building_id']!=-1].groupby('building_id').size(),fill_value=0) empty_units = empty_units[empty_units>0].order(ascending=False) print 'number of empty units is %d' %empty_units.sum() alternatives = buildings.ix[np.repeat(empty_units.index.values,empty_units.values.astype('int'))] alternatives = alternatives[alternatives.county_id == int(seg[0][0])] if((seg[1][2]/12) <= 0): alts = alternatives[alternatives['unit_price_residential'] < 186281] elif((seg[1][2]/12) >= 55000): alts = alternatives[alternatives['unit_price_residential'] > 1583400] else: alts = alternatives[alternatives['est_mortgage_payment'] / (seg[1][2]/12) <= 0.33] if(alts.shape[0] == 0): homeless = pd.concat([choosers, homeless]) print 'Could not place %d households due to income restrictions' % seg[1][1] continue pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) ##simulation for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values,numchoosers) alts_sample = alts alts_sample['join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE) alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r')) chosen = np.zeros((numchoosers,SAMPLE_SIZE)) chosen[:,0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1) pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index) new_homes = pd.Series(np.ones(len(movers.index))*-1,index=movers.index) for name, segment in segments: name_coeff = str(name) name = str(name) p=pdf['segment%s'%name] mask = np.zeros(len(alts.index),dtype='bool') mask = pd.Series(mask, index=alts.index) print "Assigning units to %d agents of segment %s" % (len(segment.index),name) def choose(p,mask,alternatives,segment,new_homes,minsize=None): p = copy.copy(p) p.loc[mask[mask==True].index] = 0 # already chosen try: indexes = np.random.choice(alternatives.index.values,len(segment.index),replace=False,p=p.values/p.values.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" indexes = np.random.choice(alternatives.index.values,len(alternatives.index.values),replace=False,p=p.values/p.values.sum()) if(new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]].shape[0] != 0): new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]] = -2 else: new_homes.ix[segment.index.values[:len(alternatives.index.values)]] = alternatives.index.values mask.loc[indexes] = True return mask,new_homes new_homes.ix[segment.index] = alternatives.loc[indexes].index.values[:len(new_homes.ix[segment.index])] mask.loc[indexes] = True return mask,new_homes mask,new_homes = choose(p,mask,alts,segment,new_homes) build_cnts = new_homes.value_counts() #num households place in each building print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0)) table = dset.households # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') #table.to_sql('tmp_out', engine, if_exists='append') table = table.ix[new_homes.index] out_table = pd.concat([table, out_table]) choosers.loc[table.index] = table #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar])) # old_building_count = buildings.shape[0] # buildings = buildings.drop(new_homes.index) # new_building_count = buildings.shape[0] # print '%d units were filled' %(new_building_count - old_building_count) #buildings = buildings.drop(new_homes) #temp_count += 1 if(temp_count > 50): break #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv') dset.households.loc[out_table.index] = out_table
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = None, output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None): import synthicity.urbansim.interaction as interaction import pandas as pd, numpy as np, copy from synthicity.utils import misc from drcog.models import transition temp_count = 0 output_csv, output_title, coeff_name, output_varname = output_names buildings = alternatives income_segment = dset.households.groupby('income').size() out_table = pd.DataFrame(columns=dset.households.columns) homeless = pd.DataFrame(columns=dset.households.columns) r = .05/12 n = 360 buildings['est_mortgage_payment']=buildings.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1)) for seg in income_segment.iteritems(): choosers = simulation_table[simulation_table['income']== seg[0]] print 'Placing %d households with an income of % d' % (seg[1],seg[0]) empty_units = buildings.residential_units.sub(simulation_table[simulation_table['building_id']!=-1].groupby('building_id').size(),fill_value=0) empty_units = empty_units[empty_units>0].order(ascending=False) print 'number of empty units is %d' %empty_units.sum() alternatives = buildings.ix[np.repeat(empty_units.index,empty_units.values.astype('int'))] if((seg[0]/12) <= 0): alts = alternatives[alternatives['unit_price_residential'] < 186281] elif((seg[0]/12) >= 55000): alts = alternatives[alternatives['unit_price_residential'] > 1583400] else: alts = alternatives[alternatives['est_mortgage_payment'] / (seg[0]/12) <= 0.33] if(alts.shape[0] == 0): homeless = pd.concat([choosers, homeless]) print 'Could not place %d households due to income restrictions' % seg[1] continue pdf = pd.DataFrame(index=alts.index) segments = choosers.groupby(agents_groupby) ##simulation for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values,numchoosers) alts_sample = alts alts_sample['join_index'] = np.repeat(segment.index,SAMPLE_SIZE) alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r')) chosen = np.zeros((numchoosers,SAMPLE_SIZE)) chosen[:,0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1) pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index) new_homes = pd.Series(np.ones(len(choosers.index))*-1,index=choosers.index) for name, segment in segments: name_coeff = str(name) name = str(name) p=pdf['segment%s'%name] mask = np.zeros(len(alts.index),dtype='bool') mask = pd.Series(mask, index=alts.index) print "Assigning units to %d agents of segment %s" % (len(segment.index),name) def choose(p,mask,alternatives,segment,new_homes,minsize=None): p = copy.copy(p) p.loc[mask[mask==True].index] = 0 # already chosen try: indexes = np.random.choice(alternatives.index.values,len(segment.index),replace=False,p=p.values/p.values.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" indexes = np.random.choice(alternatives.index.values,len(alternatives.index.values),replace=False,p=p.values/p.values.sum()) if(new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]].shape[0] != 0): new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]] = -2 else: new_homes.ix[segment.index.values[:len(alternatives.index.values)]] = alternatives.index.values mask.loc[indexes] = True return mask,new_homes new_homes.ix[segment.index] = alternatives.loc[indexes].index.values[:len(new_homes.ix[segment.index])] mask.loc[indexes] = True return mask,new_homes mask,new_homes = choose(p,mask,alts,segment,new_homes) build_cnts = new_homes.value_counts() #num households place in each building print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0)) table = simulation_table # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') #table.to_sql('tmp_out', engine, if_exists='append') table = table.ix[new_homes.index] out_table = pd.concat([table, out_table]) simulation_table.loc[table.index] = table #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar])) # old_building_count = buildings.shape[0] # buildings = buildings.drop(new_homes.index) # new_building_count = buildings.shape[0] # print '%d units were filled' %(new_building_count - old_building_count) #buildings = buildings.drop(new_homes) #temp_count += 1 if(temp_count > 50): break out_table.to_csv('C:/users/jmartinez/documents/households_new_location.csv')
def simulate(dset, year, depvar='building_id', alternatives=None, simulation_table='households', output_names=None, agents_groupby=[ 'income_3_tenure', ], transition_config=None, relocation_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) if 'persons' in ct.columns: del ct['persons'] ct["total_number_of_households"] = ( ct["total_number_of_households"] * transition_config['scaling_factor']).astype('int32') hh = dset.fetch('households') persons = dset.fetch('persons') tran = transition.TabularTotalsTransition( ct, 'total_number_of_households') model = transition.TransitionModel(tran) #import pdb; pdb.set_trace() new, added, new_linked = model.transition( hh, year, linked_tables={'linked': (persons, 'household_id')}) new.loc[added, 'building_id'] = -1 dset.d['households'] = new dset.d['persons'] = new_linked['linked'] # new_hhlds = {"table": "dset.households","writetotmp": "households","model": "transitionmodel","first_year": 2010,"control_totals": "dset.%s"%transition_config['control_totals_table'], # "geography_field": "building_id","amount_field": "total_number_of_households"} # import synthicity.urbansim.transitionmodel as transitionmodel # transitionmodel.simulate(dset,new_hhlds,year=year,show=True,subtract=True) dset.households.index.name = 'household_id' choosers = dset.fetch(simulation_table) if relocation_config['Enabled']: rate_table = dset.store[ relocation_config['relocation_rates_table']].copy() rate_field = "probability_of_relocating" rate_table[rate_field] = rate_table[ rate_field] * .01 * relocation_config['scaling_factor'] movers = dset.relocation_rates(choosers, rate_table, rate_field) choosers[depvar].ix[movers] = -1 movers = choosers[choosers[depvar] == -1] print "Total new agents and movers = %d" % len(movers.index) empty_units = dset.buildings[(dset.buildings.residential_units > 0)].residential_units.sub( choosers.groupby('building_id').size(), fill_value=0) empty_units = empty_units[empty_units > 0].order(ascending=False) alternatives = alternatives.ix[np.repeat(empty_units.index.values, empty_units.values.astype('int'))] alts = alternatives pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert( dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values, numchoosers) alts_sample = alts alts_sample.loc[:, 'join_index'] = np.repeat( segment.index.values, SAMPLE_SIZE) # corrected chained index error alts_sample = pd.merge(alts_sample, segment, left_on='join_index', right_index=True, suffixes=('', '_r')) chosen = np.zeros((numchoosers, SAMPLE_SIZE)) chosen[:, 0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl', chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) > alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) < alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data, coeff, numalts=SAMPLE_SIZE, returnprobs=1) pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index) new_homes = pd.Series(np.ones(len(movers.index)) * -1, index=movers.index) for name, segment in segments: name_coeff = str(name) name = str(name) p = pdf['segment%s' % name].values mask = np.zeros(len(alts.index), dtype='bool') print "Assigning units to %d agents of segment %s" % (len( segment.index), name) def choose(p, mask, alternatives, segment, new_homes, minsize=None): p = copy.copy(p) p[mask] = 0 # already chosen try: indexes = np.random.choice(len(alternatives.index), len(segment.index), replace=False, p=p / p.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" return mask, new_homes new_homes.ix[segment.index] = alternatives.index.values[indexes] mask[indexes] = 1 return mask, new_homes mask, new_homes = choose(p, mask, alts, segment, new_homes) build_cnts = new_homes.value_counts( ) #num households place in each building print "Assigned %d agents to %d locations with %d unplaced" % ( new_homes.size, build_cnts.size, build_cnts.get(-1, 0)) table = dset.households # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') dset.store_attr(output_varname, year, copy.deepcopy(table[depvar]))
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = 'establishments', output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) ct["total_number_of_jobs"] = (ct["total_number_of_jobs"]*transition_config['scaling_factor']).astype('int32') new_jobs = {"table": "dset.establishments","writetotmp": "establishments","model": "transitionmodel","first_year": 2010,"control_totals": "dset.%s"%transition_config['control_totals_table'], "geography_field": "building_id","amount_field": "total_number_of_jobs","size_field":"employees"} import synthicity.urbansim.transitionmodel as transitionmodel transitionmodel.simulate(dset,new_jobs,year=year,show=True) dset.establishments.index.name = 'establishment_id' choosers = dset.fetch(simulation_table) placed_choosers = choosers[choosers[depvar]>0] movers = choosers[choosers[depvar]==-1] movers.loc[:, "zone_id"] = -1 print "Total new agents and movers = %d" % len(movers.index) alternatives.building_sqft_per_job = alternatives.building_sqft_per_job.fillna(1000) alternatives.loc[:, 'spaces'] = alternatives.non_residential_sqft/alternatives.building_sqft_per_job # corrected chained indexing error empty_units = alternatives.spaces.sub(placed_choosers.groupby('building_id').employees.sum(),fill_value=0).astype('int') empty_units = empty_units[empty_units>0].order(ascending=False) alts = alternatives.ix[empty_units.index] alts["supply"] = empty_units lotterychoices = True pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values,numchoosers) alts_sample = alts #sample#alternatives alts_sample['join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE) alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r')) chosen = np.zeros((numchoosers,SAMPLE_SIZE)) chosen[:,0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1) pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index) new_homes = pd.Series(np.ones(len(movers.index))*-1,index=movers.index) mask = np.zeros(len(alts.index),dtype='bool') for name, segment in segments: name = str(name) print "Assigning units to %d agents of segment %s" % (len(segment.index),name) p=pdf['segment%s'%name] def choose(p,mask,alternatives,segment,new_homes,minsize=None): choiceset = alternatives.loc[alternatives.supply >= minsize] p = copy.copy(p) #p[alternatives.supply<minsize] = 0 p = p[alternatives.supply >= minsize] p_arr = p.values try: #indexes = np.random.choice(len(alternatives.index),len(segment.index),replace=False,p=p/p.sum()) indexes = np.random.choice(choiceset.index, len(segment.index), replace=False, p=p_arr/p_arr.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" return mask,new_homes #new_homes.ix[segment.index] = alternatives.index.values[indexes] #alternatives["supply"].ix[alternatives.index.values[indexes]] -= minsize new_homes.ix[segment.index] = indexes #alternatives["supply"].ix[indexes] -= minsize alternatives.loc[indexes, "supply"] -= minsize return mask,new_homes tmp = segment['employees'] for name, subsegment in reversed(list(segment.groupby(tmp.astype('int')))): mask,new_homes = choose(p,mask,alts,subsegment,new_homes,minsize=int(name)) build_cnts = new_homes.value_counts() #num estabs place in each building print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0)) new_homes_frame = pd.DataFrame(new_homes, columns=['building_id']) result_set = pd.merge(new_homes_frame, dset.buildings, left_on='building_id', right_index=True, how='left')[['building_id','zone_id']] result_set["employees"] = pd.merge(result_set, dset.establishments, left_index=True, right_index=True)['employees'].values table = dset.establishments # need to go back to the whole dataset #table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') table.loc[result_set.index, "building_id"] = -1 table.loc[result_set.index, "zone_id"] = -1 table.loc[result_set.index,"building_id"] = result_set.building_id.values table.loc[result_set.index,"zone_id"] = result_set.zone_id #table["zone_id"] = pd.merge(table, result_set, on='building_id', how='left')['zone_id'].values #table["zone_id"] = pd.merge(table, dset.buildings, left_on='building_id', right_index=True, how='left')["zone_id_y"].values dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = 'households', output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None): output_csv, output_title, coeff_name, output_varname = output_names if transition_config['Enabled']: ct = dset.fetch(transition_config['control_totals_table']) if 'persons' in ct.columns: del ct['persons'] ct["total_number_of_households"] = (ct["total_number_of_households"]*transition_config['scaling_factor']).astype('int32') hh = dset.fetch('households') persons = dset.fetch('persons') tran = transition.TabularTotalsTransition(ct, 'total_number_of_households') model = transition.TransitionModel(tran) #import pdb; pdb.set_trace() new, added, new_linked = model.transition( hh, year, linked_tables={'linked': (persons, 'household_id')}) new.loc[added,'building_id'] = -1 dset.d['households'] = new dset.d['persons'] = new_linked['linked'] # new_hhlds = {"table": "dset.households","writetotmp": "households","model": "transitionmodel","first_year": 2010,"control_totals": "dset.%s"%transition_config['control_totals_table'], # "geography_field": "building_id","amount_field": "total_number_of_households"} # import synthicity.urbansim.transitionmodel as transitionmodel # transitionmodel.simulate(dset,new_hhlds,year=year,show=True,subtract=True) dset.households.index.name = 'household_id' choosers = dset.fetch(simulation_table) if relocation_config['Enabled']: rate_table = dset.store[relocation_config['relocation_rates_table']].copy() rate_field = "probability_of_relocating" rate_table[rate_field] = rate_table[rate_field]*.01*relocation_config['scaling_factor'] movers = dset.relocation_rates(choosers,rate_table,rate_field) choosers[depvar].ix[movers] = -1 movers = choosers[choosers[depvar]==-1] print "Total new agents and movers = %d" % len(movers.index) empty_units = dset.buildings[(dset.buildings.residential_units>0)].residential_units.sub(choosers.groupby('building_id').size(),fill_value=0) empty_units = empty_units[empty_units>0].order(ascending=False) alternatives = alternatives.ix[np.repeat(empty_units.index.values,empty_units.values.astype('int'))] alts = alternatives pdf = pd.DataFrame(index=alts.index) segments = movers.groupby(agents_groupby) for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values,numchoosers) alts_sample = alts alts_sample.loc[:, 'join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE) # corrected chained index error alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r')) chosen = np.zeros((numchoosers,SAMPLE_SIZE)) chosen[:,0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1) pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index) new_homes = pd.Series(np.ones(len(movers.index))*-1,index=movers.index) for name, segment in segments: name_coeff = str(name) name = str(name) p=pdf['segment%s'%name].values mask = np.zeros(len(alts.index),dtype='bool') print "Assigning units to %d agents of segment %s" % (len(segment.index),name) def choose(p,mask,alternatives,segment,new_homes,minsize=None): p = copy.copy(p) p[mask] = 0 # already chosen try: indexes = np.random.choice(len(alternatives.index),len(segment.index),replace=False,p=p/p.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" return mask,new_homes new_homes.ix[segment.index] = alternatives.index.values[indexes] mask[indexes] = 1 return mask,new_homes mask,new_homes = choose(p,mask,alts,segment,new_homes) build_cnts = new_homes.value_counts() #num households place in each building print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0)) table = dset.households # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
def simulate(dset, year, depvar='building_id', alternatives=None, simulation_table=None, output_names=None, agents_groupby=[ 'income_3_tenure', ], transition_config=None, relocation_config=None): import synthicity.urbansim.interaction as interaction import pandas as pd, numpy as np, copy from synthicity.utils import misc from drcog.models import transition temp_count = 0 output_csv, output_title, coeff_name, output_varname = output_names buildings = alternatives income_segment = dset.households.groupby('income').size() out_table = pd.DataFrame(columns=dset.households.columns) homeless = pd.DataFrame(columns=dset.households.columns) r = .05 / 12 n = 360 buildings['est_mortgage_payment'] = buildings.unit_price_residential * ( (r * (1 + r)**n) / ((1 + r)**n - 1)) for seg in income_segment.iteritems(): choosers = simulation_table[simulation_table['income'] == seg[0]] print 'Placing %d households with an income of % d' % (seg[1], seg[0]) empty_units = buildings.residential_units.sub( simulation_table[simulation_table['building_id'] != -1].groupby( 'building_id').size(), fill_value=0) empty_units = empty_units[empty_units > 0].order(ascending=False) print 'number of empty units is %d' % empty_units.sum() alternatives = buildings.ix[np.repeat( empty_units.index, empty_units.values.astype('int'))] if ((seg[0] / 12) <= 0): alts = alternatives[ alternatives['unit_price_residential'] < 186281] elif ((seg[0] / 12) >= 55000): alts = alternatives[ alternatives['unit_price_residential'] > 1583400] else: alts = alternatives[alternatives['est_mortgage_payment'] / (seg[0] / 12) <= 0.33] if (alts.shape[0] == 0): homeless = pd.concat([choosers, homeless]) print 'Could not place %d households due to income restrictions' % seg[ 1] continue pdf = pd.DataFrame(index=alts.index) segments = choosers.groupby(agents_groupby) ##simulation for name, segment in segments: segment = segment.head(1) name = str(name) tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert( dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist() SAMPLE_SIZE = alts.index.size numchoosers = segment.shape[0] numalts = alts.shape[0] sample = np.tile(alts.index.values, numchoosers) alts_sample = alts alts_sample['join_index'] = np.repeat(segment.index, SAMPLE_SIZE) alts_sample = pd.merge(alts_sample, segment, left_on='join_index', right_index=True, suffixes=('', '_r')) chosen = np.zeros((numchoosers, SAMPLE_SIZE)) chosen[:, 0] = 1 sample, alternative_sample, est_params = sample, alts_sample, ( 'mnl', chosen) ##Interaction variables interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var] for ivar in interaction_vars: if ivar[1][0].endswith('gt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) > alternative_sample[ivar[1][1]]).astype('int32') if ivar[1][0].endswith('lt'): alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) < alternative_sample[ivar[1][1]]).astype('int32') else: alternative_sample[ivar[0]] = ( (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]]) est_data = pd.DataFrame(index=alternative_sample.index) for varname in ind_vars: est_data[varname] = alternative_sample[varname] est_data = est_data.fillna(0) data = est_data data = data.as_matrix() coeff = dset.load_coeff(tmp_coeffname) probs = interaction.mnl_simulate(data, coeff, numalts=SAMPLE_SIZE, returnprobs=1) pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index) new_homes = pd.Series(np.ones(len(choosers.index)) * -1, index=choosers.index) for name, segment in segments: name_coeff = str(name) name = str(name) p = pdf['segment%s' % name] mask = np.zeros(len(alts.index), dtype='bool') mask = pd.Series(mask, index=alts.index) print "Assigning units to %d agents of segment %s" % (len( segment.index), name) def choose(p, mask, alternatives, segment, new_homes, minsize=None): p = copy.copy(p) p.loc[mask[mask == True].index] = 0 # already chosen try: indexes = np.random.choice(alternatives.index.values, len(segment.index), replace=False, p=p.values / p.values.sum()) except: print "WARNING: not enough options to fit agents, will result in unplaced agents" indexes = np.random.choice(alternatives.index.values, len(alternatives.index.values), replace=False, p=p.values / p.values.sum()) if (new_homes.ix[segment[segment.tenure == 2].index. values[:len(alternatives.index.values)]]. shape[0] != 0): new_homes.ix[ segment[segment.tenure == 2].index. values[:len(alternatives.index.values)]] = -2 else: new_homes.ix[segment.index.values[:len( alternatives.index.values )]] = alternatives.index.values mask.loc[indexes] = True return mask, new_homes new_homes.ix[segment.index] = alternatives.loc[ indexes].index.values[:len(new_homes.ix[segment.index])] mask.loc[indexes] = True return mask, new_homes mask, new_homes = choose(p, mask, alts, segment, new_homes) build_cnts = new_homes.value_counts( ) #num households place in each building print "Assigned %d agents to %d locations with %d unplaced" % ( new_homes.size, build_cnts.size, build_cnts.get(-1, 0)) table = simulation_table # need to go back to the whole dataset table[depvar].ix[new_homes.index] = new_homes.values.astype('int32') #table.to_sql('tmp_out', engine, if_exists='append') table = table.ix[new_homes.index] out_table = pd.concat([table, out_table]) simulation_table.loc[table.index] = table #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar])) # old_building_count = buildings.shape[0] # buildings = buildings.drop(new_homes.index) # new_building_count = buildings.shape[0] # print '%d units were filled' %(new_building_count - old_building_count) #buildings = buildings.drop(new_homes) #temp_count += 1 if (temp_count > 50): break out_table.to_csv( 'C:/users/jmartinez/documents/households_new_location.csv')