def accessory_units(year, buildings, parcels): add_units = pd.read_csv("data/accessory_units.csv", index_col="juris")[str(year)] buildings_juris = misc.reindex(parcels.juris, buildings.parcel_id) res_buildings = buildings_juris[buildings.general_type == "Residential"] add_buildings = groupby_random_choice(res_buildings, add_units) add_buildings = pd.Series(add_buildings.index).value_counts() buildings.local.loc[add_buildings.index, "residential_units"] += \ add_buildings.values
def _proportional_jobs_model( target_ratio, # ratio of jobs of this sector to households sector, # empsix sector groupby_col, # ratio will be matched at this level of geog hh_df, jobs_df, locations_series, target_jobs=None # pass this if you want to compute target jobs ): if target_jobs is None: # compute it if not passed target_jobs = hh_df[groupby_col].value_counts() * target_ratio target_jobs = target_jobs.astype('int') current_jobs = jobs_df[ jobs_df.empsix == sector][groupby_col].value_counts() need_more_jobs = target_jobs - current_jobs need_more_jobs = need_more_jobs[need_more_jobs > 0] need_more_jobs_total = int(need_more_jobs.sum()) available_jobs = \ jobs_df.query("empsix == '%s' and building_id == -1" % sector) print "Need more jobs total: %d" % need_more_jobs_total print "Available jobs: %d" % len(available_jobs) if len(available_jobs) == 0: # corner case return pd.Series() if len(available_jobs) >= need_more_jobs_total: # have enough jobs to assign, truncate available jobs available_jobs = available_jobs.head(need_more_jobs_total) else: # don't have enough jobs - random sample locations to partially # match the need (won't succed matching the entire need) need_more_jobs = round_series_match_target( need_more_jobs, len(available_jobs), 0) need_more_jobs_total = need_more_jobs.sum() assert need_more_jobs_total == len(available_jobs) if need_more_jobs_total <= 0: return pd.Series() print "Need more jobs\n", need_more_jobs choices = groupby_random_choice(locations_series, need_more_jobs) # choose random locations within jurises to match need_more_jobs totals return pd.Series(choices.index, available_jobs.index)
def vacant_market_rate_units_minus_structural_vacancy(buildings, baseyear_taz_controls): # this will take vacant_market_rate_units above and remove the number of # units that we require to be vacant because of the structural vacancy rate # first sum the residential units by zone and multiply by structural # vacancy rate in order to get the required vacancies residential_units_by_zone = \ buildings.residential_units.groupby(buildings.zone_id).sum() required_vacant_units_by_zone = \ (residential_units_by_zone * baseyear_taz_controls.target_ltvacancy).astype("int") # repeat building ids according to the number of vacant units unit_zone_ids = \ buildings.zone_id.repeat( buildings.vacant_market_rate_units.astype("int")) # this is some convoluted pandas for the next two lines! # but the concept is simple: # can't require more vacancy units than we have s = unit_zone_ids.value_counts().reindex( required_vacant_units_by_zone.index).fillna(0) required_vacant_units_by_zone = \ required_vacant_units_by_zone.clip(upper=s).astype('int') # select among units to remove from the choice and leave vacant remove_unit_zone_ids =\ groupby_random_choice(unit_zone_ids, required_vacant_units_by_zone, replace=False) # the building ids are the index, so count em up remove_building_zone_ids = pd.Series( remove_unit_zone_ids.index).value_counts() # subtract the ones we want to stay vacant from the vacant ones s = buildings.vacant_market_rate_units.sub(remove_building_zone_ids, fill_value=0) return s
def _proportional_jobs_model( target_ratio, # ratio of jobs of this sector to households sector, # empsix sector groupby_col, # ratio will be matched at this level of geog hh_df, jobs_df, locations_series, target_jobs=None # pass this if you want to compute target jobs ): if target_jobs is None: # compute it if not passed target_jobs = hh_df[groupby_col].value_counts() * target_ratio target_jobs = target_jobs.astype('int') current_jobs = jobs_df[ jobs_df.empsix == sector][groupby_col].value_counts() need_more_jobs = target_jobs - current_jobs need_more_jobs = need_more_jobs[need_more_jobs > 0] need_more_jobs_total = int(need_more_jobs.sum()) available_jobs = \ jobs_df.query("empsix == '%s' and building_id == -1" % sector) print "Need more jobs total: %d" % need_more_jobs_total print "Available jobs: %d" % len(available_jobs) if len(available_jobs) == 0: # corner case return pd.Series() if len(available_jobs) >= need_more_jobs_total: # have enough jobs to assign, truncate available jobs available_jobs = available_jobs.head(need_more_jobs_total) else: # don't have enough jobs - random sample locations to partially # match the need (won't succed matching the entire need) need_more_jobs = round_series_match_target( need_more_jobs, len(available_jobs), 0) need_more_jobs_total = need_more_jobs.sum() assert need_more_jobs_total == len(available_jobs) if need_more_jobs_total <= 0: return pd.Series() print "Need more jobs\n", need_more_jobs excess = need_more_jobs.sub(locations_series.value_counts(), fill_value=0) print "Excess demand\n", excess[excess > 0] # there's an issue with groupby_random_choice where it can't choose from # a set of locations that don't exist - e.g. we have 2 jobs in a certain # city but not locations to put them in. we need to drop this demand drop = need_more_jobs.index.difference(locations_series.unique()) print "We don't have any locations for these locations:\n", drop need_more_jobs = need_more_jobs.drop(drop) # choose random locations within jurises to match need_more_jobs totals choices = groupby_random_choice(locations_series, need_more_jobs, replace=True) # these might not be the same length after dropping a few lines above available_jobs = available_jobs.head(len(choices)) return pd.Series(choices.index, available_jobs.index)