예제 #1
0
def parcel_summary(parcels, buildings, households, jobs, run_number, year,
                   parcels_zoning_calculations, initial_year, final_year):

    if year not in [initial_year, final_year]:
        return

    df = parcels.to_frame([
        "x", "y", "total_residential_units", "total_job_spaces",
        "first_building_type_id"
    ])

    df2 = parcels_zoning_calculations.to_frame(
        ["zoned_du", "zoned_du_underbuild", "zoned_du_underbuild_nodev"])

    df = df.join(df2)

    households_df = orca.merge_tables(
        'households', [buildings, households],
        columns=['parcel_id', 'base_income_quartile'])

    # add households by quartile on each parcel
    for i in range(1, 5):
        df['hhq%d' % i] = households_df[
            households_df.base_income_quartile == i].\
            parcel_id.value_counts()

    jobs_df = orca.merge_tables('jobs', [buildings, jobs],
                                columns=['parcel_id', 'empsix'])

    # add jobs by empsix category on each parcel
    for cat in jobs_df.empsix.unique():
        df[cat] = jobs_df[jobs_df.empsix == cat].\
            parcel_id.value_counts()

    df.to_csv(
        os.path.join("runs", "run%d_parcel_data_%d.csv" % (run_number, year)))

    if year == final_year:

        # do diff with initial year

        df2 = pd.read_csv(os.path.join(
            "runs", "run%d_parcel_data_%d.csv" % (run_number, initial_year)),
                          index_col="parcel_id")

        for col in df.columns:

            if col in ["x", "y", "first_building_type_id"]:
                continue

            df[col] = df[col] - df2[col]

        df.to_csv(
            os.path.join("runs", "run%d_parcel_data_diff.csv" % run_number))
예제 #2
0
def juris_ave_income(households, buildings, parcels_geography, parcels):
    h = orca.merge_tables("households",
                          [households, buildings, parcels_geography],
                          columns=["jurisdiction_id", "income"])
    s = h.groupby(h.jurisdiction_id).income.quantile(.5)
    return misc.reindex(s, parcels_geography.jurisdiction_id).\
        reindex(parcels.index).fillna(s.median()).apply(np.log1p)
예제 #3
0
def ln_pop_within_20min(zones, t_data_dist20):
    b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id'])
    zonal_pop=zones.zonal_pop
    t_data=t_data_dist20.to_frame()
    t_data.loc[:,'attr']=zonal_pop[t_data_dist20.to_zone_id].values
    zone_time_range=t_data.groupby(level=0).attr.apply(np.sum)
    return reindex(zone_time_range, b.zone_id).apply(np.log1p)
예제 #4
0
 def hh_merged():
     df = orca.merge_tables(target='households',
                            tables=[
                                'households', 'units', 'buildings',
                                'parcels', 'nodessmall', 'nodeswalk'
                            ])
     return df
예제 #5
0
def juris_ave_income(households, buildings, parcels_geography, parcels):
    h = orca.merge_tables("households",
                          [households, buildings, parcels_geography],
                          columns=["jurisdiction_id", "income"])
    s = h.groupby(h.jurisdiction_id).income.quantile(.5)
    return misc.reindex(s, parcels_geography.jurisdiction_id).\
        reindex(parcels.index).fillna(s.median()).apply(np.log1p)
예제 #6
0
def ln_non_residential_sqft_zone():
    b = orca.merge_tables(
        'buildings',
        tables=['buildings', 'parcels'],
        columns=['btype_hlcm', 'residential_units', 'zone_id'])
    return b[b.btype_hlcm == 3].groupby('zone_id').residential_units.sum(
    ) * 100.0 / (b.groupby('zone_id').residential_units.sum())
예제 #7
0
def avg_unit_price_zone(zones):
    b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['residential_units','improvement_value','unit_price_residential','zone_id'])
    b =  b[(b.residential_units>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_residential.mean()
    out = pd.Series(index=zones.index, name='avg_unit_price_zone')
    out.loc[b.index] = b
    out.loc[out.isnull()] = b.mean()
    return out
예제 #8
0
def ln_pop_within_20min(zones, t_data_dist20):
    b = orca.merge_tables('buildings',
                          tables=['buildings', 'parcels'],
                          columns=['zone_id'])
    zonal_pop = zones.zonal_pop
    t_data = t_data_dist20.to_frame()
    t_data.loc[:, 'attr'] = zonal_pop[t_data_dist20.to_zone_id].values
    zone_time_range = t_data.groupby(level=0).attr.apply(np.sum)
예제 #9
0
def jobs_within_15min(zones, t_data_dist15):
    b = orca.merge_tables('buildings',
                          tables=['buildings', 'parcels'],
                          columns=['zone_id'])
    t_data = t_data_dist15.to_frame()
    t_data.loc[:, 'attr'] = zones.zonal_emp[t_data_dist15.to_zone_id].values
    zone_time_range = t_data.groupby(level=0).attr.apply(np.sum)
    return reindex(zone_time_range, b.zone_id)
예제 #10
0
def btype(households):
    df = orca.merge_tables('households',
                           tables=['households', 'buildings'],
                           columns=['building_type_id'])
    return 1 * (df.building_type_id
                == 2) + 2 * (df.building_type_id == 3) + 3 * (
                    df.building_type_id == 20) + 4 * np.invert(
                        np.in1d(df.building_type_id, [2, 3, 20]))
예제 #11
0
def ln_emp_sector5_within_20min(t_data_dist20):
    b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id'])
    e =orca.get_table('establishments').to_frame(columns=['sector_id_six','zone_id','employees'])
    e = e.loc[e.sector_id_six == 5]
    zonal_emp = e.groupby('zone_id').employees.sum()
    t_data=t_data_dist20.to_frame()
    t_data.loc[:,'attr']=zonal_emp[t_data_dist20.to_zone_id].values
    zone_time_range=t_data.groupby(level=0).attr.apply(np.sum)
    return reindex(zone_time_range,b.zone_id).apply(np.log1p)
예제 #12
0
    def _get_df(self,
                tables='unset',
                fallback_tables=None,
                filters='unset',
                model_expression=None):
        """
        Generate a data table for estimation or prediction, relying on functionality from
        Orca and `urbansim.models.util`. This should be performed immediately before
        estimation or prediction so that it reflects the current data state.
        
        The output includes only the necessary columns: those mentioned in the model 
        expression or filters, plus (it appears) the index of each merged table. Relevant
        row filters are also applied.
        
        TO DO - this method is a generalization of _get_data(), and should replace it, 
        but does not currently support column filtering or PyLogit model expressions.
        
        Parameters
        ----------
        tables : str or list of str, optional
            Name of table or tables. If not provided, `self.tables` will be used.
        
        fallback_tables : str or list of str, optional
            Table(s) to use if first argument evaluates to `None`.
            
        filters : str or list of str, optional
            Filter(s) to apply. If not provided, `self.filters` will be used.
            
        model_expression : NOT YET IMPLEMENTED
            Model expression, for determining which columns are needed. If not provided,
            `self.model_expression` will be used.
            
            TO DO - this needs to handle the large MNL case where there are two sets of
            data tables, so we can't use urbansim.models.util.columns_in_formula()
        
        Returns
        -------
        DataFrame
        
        """
        if tables == 'unset':
            tables = self.tables

        if tables is None:
            tables = fallback_tables

        if filters == 'unset':
            filters = self.filters

        if isinstance(tables, list):
            df = orca.merge_tables(target=tables[0], tables=tables)
        else:
            df = orca.get_table(tables).to_frame()

        df = util.apply_filter_query(df, filters)
        return df
예제 #13
0
def avg_unit_price_zone():
    b = orca.merge_tables('buildings',
                          tables=['buildings', 'parcels'],
                          columns=[
                              'non_residential_sqft', 'improvement_value',
                              'unit_price_non_residential', 'zone_id'
                          ])

    return b[(b.non_residential_sqft > 0) * (b.improvement_value > 0)].groupby(
        'zone_id').unit_price_non_residential.mean()
예제 #14
0
def emp_transition(employment_control_totals, year):
    location_fname = 'zone_id'
    tran = TabularTotalsTransition(employment_control_totals.to_frame(), 'total_number_of_jobs', accounting_column='employees')
    df = orca.merge_tables('establishments', tables=['establishments', 'counties'])
    print "%d establishments with %d employees before transition" % (len(df.index), df.employees.sum())
    df, added, copied, removed = tran.transition(df, year)
    print "%d establishments with %d employees after transition" % (len(df.index), df.employees.sum())
    df.loc[added, location_fname] = -1
    df.loc[added, "building_id"] = -1
    orca.add_table('establishments', df.loc[:, orca.get_table('establishments').local_columns])
예제 #15
0
def juris_ave_income(households, buildings, parcels_geography, parcels):
    # get frame of income and jurisdiction
    h = orca.merge_tables("households",
                          [households, buildings, parcels_geography],
                          columns=["jurisdiction_id", "income"])
    # get median income by jurisdiction
    s = h.groupby(h.jurisdiction_id).income.quantile(.5)
    # map it to parcels - fill na with median for all areas
    # should probably remove the log transform and do that in the models
    return misc.reindex(s, parcels_geography.jurisdiction_id).\
        reindex(parcels.index).fillna(s.median()).apply(np.log1p)
예제 #16
0
def to_frame(tables, cfg, additional_columns=[]):
    cfg = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg)
    tables = [t for t in tables if t is not None]
    columns = misc.column_list(tables, cfg.columns_used()) + additional_columns
    if len(tables) > 1:
        df = orca.merge_tables(target=tables[0].name,
                               tables=tables, columns=columns)
    else:
        df = tables[0].to_frame(columns)
    df = deal_with_nas(df)
    return df
예제 #17
0
def juris_ave_income(households, buildings, parcels_geography, parcels):
    # get frame of income and jurisdiction
    h = orca.merge_tables("households",
                          [households, buildings, parcels_geography],
                          columns=["jurisdiction_id", "income"])
    # get median income by jurisdiction
    s = h.groupby(h.jurisdiction_id).income.quantile(.5)
    # map it to parcels - fill na with median for all areas
    # should probably remove the log transform and do that in the models
    return misc.reindex(s, parcels_geography.jurisdiction_id).\
        reindex(parcels.index).fillna(s.median()).apply(np.log1p)
예제 #18
0
def to_frame(tables, cfg, additional_columns=[]):
    cfg = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg)
    tables = [t for t in tables if t is not None]
    columns = misc.column_list(tables, cfg.columns_used()) + additional_columns
    if len(tables) > 1:
        df = orca.merge_tables(target=tables[0].name,
                               tables=tables, columns=columns)
    else:
        df = tables[0].to_frame(columns)
    df = deal_with_nas(df)
    return df
예제 #19
0
def jobs_subset(jobs, parcels, buildings):

    df = orca.merge_tables('jobs', [parcels, buildings, jobs],
                           columns=['zone_id', 'empsix'])

    # totally baffled by this - after joining the three tables we have three
    # zone_ids, one from the parcel table, one from buildings, and one from jobs
    # and the one called zone_id has null values while there others do not
    # going to change this while I think about this
    df["zone_id"] = df.zone_id_x

    return df
예제 #20
0
def get_person_geo():
    # join person data with hourshold data
    df4 = orca.merge_tables(target='my_household', tables=['my_person', 'my_household'])
    # geographic info to dictionary 
    faz_dict = dict(zip(df3['census_2010_block_group_id'], df3['faz_id']))
    zone_dict = dict(zip(df3['census_2010_block_group_id'], df3['zone_id']))
    city_dict = dict(zip(df3['census_2010_block_group_id'], df3['city_id']))
    # map geo info to person table
    df4['faz_id'] = df4['census_2010_block_group_id'].map(faz_dict)
    df4['zone_id'] = df4['census_2010_block_group_id'].map(zone_dict)
    df4['city_id'] = df4['census_2010_block_group_id'].map(city_dict)
    orca.add_table('my_person_geo', df4)
예제 #21
0
def ln_emp_sector5_within_20min(t_data_dist20):
    b = orca.merge_tables('buildings',
                          tables=['buildings', 'parcels'],
                          columns=['zone_id'])
    e = orca.get_table('establishments').to_frame(
        columns=['sector_id_six', 'zone_id', 'employees'])
    e = e.loc[e.sector_id_six == 5]
    zonal_emp = e.groupby('zone_id').employees.sum()
    t_data = t_data_dist20.to_frame()
    t_data.loc[:, 'attr'] = zonal_emp[t_data_dist20.to_zone_id].values
    zone_time_range = t_data.groupby(level=0).attr.apply(np.sum)
    return reindex(zone_time_range, b.zone_id).apply(np.log1p)
예제 #22
0
    def _get_data(self, task='fit'):
        """
        DEPRECATED - this should be replaced by the more general utils.get_data()
        
        Generate a data table for estimation or prediction, relying on functionality from
        Orca and UrbanSim.models.util. This should be performed immediately before 
        estimation or prediction so that it reflects the current data state.
        
        The output includes only the necessary columns: those mentioned in the model
        expression or filters, plus (it appears) the index of each merged table. Relevant 
        filter queries are applied.
        
        Parameters
        ----------
        task : 'fit' or 'predict'
        
        Returns
        -------
        DataFrame
        
        """
        # TO DO - verify input data

        if isinstance(self.model_expression, str):
            expr_cols = util.columns_in_formula(self.model_expression)

        if (task == 'fit'):
            tables = self.tables
            columns = expr_cols + util.columns_in_filters(self.filters)
            filters = self.filters

        elif (task == 'predict'):
            if self.out_tables is not None:
                tables = self.out_tables
            else:
                tables = self.tables

            columns = expr_cols + util.columns_in_filters(self.out_filters)
            if self.out_column is not None:
                columns += [self.out_column]

            filters = self.out_filters

        if isinstance(tables, list):
            df = orca.merge_tables(target=tables[0],
                                   tables=tables,
                                   columns=columns)
        else:
            df = orca.get_table(tables).to_frame(columns)

        df = util.apply_filter_query(df, filters)
        return df
예제 #23
0
def parcel_avg_price(use):
    #if use is residential translate unit price to price per sqft
    buildings = orca.merge_tables('buildings', tables=['buildings','parcels'],
                                  columns=['unit_price_residential','building_type_id','residential_sqft',
                                           'zone_id', 'unit_price_non_residential'])
    use_btype = orca.get_injectable('use_btype')
    if use == 'residential':
        price = (buildings.unit_price_residential.loc[np.in1d(buildings.building_type_id, use_btype[use])] /
                 buildings.residential_sqft.loc[np.in1d(buildings.building_type_id, use_btype[use])]).groupby(buildings.zone_id).mean()
    else:
        price = buildings.unit_price_non_residential.loc[np.in1d(buildings.building_type_id, use_btype[use])].groupby(buildings.zone_id).mean()

    return misc.reindex(price, orca.get_table('parcels').zone_id)
예제 #24
0
def avg_unit_price_zone(zones):
    b = orca.merge_tables('buildings',
                          tables=['buildings', 'parcels'],
                          columns=[
                              'residential_units', 'improvement_value',
                              'unit_price_residential', 'zone_id'
                          ])
    b = b[(b.residential_units > 0) * (b.improvement_value > 0)].groupby(
        'zone_id').unit_price_residential.mean()
    out = pd.Series(index=zones.index, name='avg_unit_price_zone')
    out.loc[b.index] = b
    out.loc[out.isnull()] = b.mean()
    return out
예제 #25
0
def hh_transition(households, household_control_totals, year):
    location_fname = 'zone_id'
    tran = TabularTotalsTransition(household_control_totals.to_frame(), 'households')
    #tran = TabularGrowthRateTransition(household_control_totals.to_frame(), 'annual_rate')
    df = orca.merge_tables('households', tables=['households', 'counties'])
    print "{0} households with a total population of {1}".format(len(df.index), df.persons.sum())
    df, added, copied, removed = tran.transition(df, year)
    print "{0} households with a total population of {1}".format(len(df.index), df.persons.sum())
    df.loc[added, location_fname] = -1
    df.loc[added, 'building_id'] = -1
    orca.add_table('households', df.loc[:, orca.get_table('households').local_columns])

    print df.groupby('county_id').persons.sum()
예제 #26
0
def TOD_choice_simulate():
    """
    Generate time of day period choices for the synthetic population
    home-work and work-home trips.
    
    """
    TOD_obs = orca.merge_tables('persons', ['persons', 'households', 'jobs'])

    TOD_obs.dropna(inplace=True)

    skims = pd.read_csv('./data/skims_110118.csv')

    TOD_obs = pd.merge(TOD_obs,
                       skims,
                       how='left',
                       left_on=['zone_id_home', 'zone_id_work'],
                       right_on=['orig', 'dest'])

    TOD_obs = pd.merge(TOD_obs,
                       skims,
                       how='left',
                       left_on=['zone_id_work', 'zone_id_home'],
                       right_on=['orig', 'dest'],
                       suffixes=('_HW', '_WH'))

    TOD_list = ['EA', 'AM', 'MD', 'PM', 'EV']

    for tod1 in TOD_list:
        for tod2 in TOD_list:
            col_name = f'da_Time_{tod1}_{tod2}'
            TOD_obs[col_name] = TOD_obs[f'da_Time_{tod1}_HW'] + TOD_obs[
                f'da_Time_{tod2}_WH']

    # TOD_obs['TOD'] = None

    m = mm.get_step('TOD_choice')

    @orca.table(cache=True)
    def tripsA():
        return TOD_obs

    m.run()

    results = orca.get_table('tripsA').to_frame()
    persons = orca.get_table('persons').to_frame()
    persons = pd.merge(persons,
                       results[['TOD']],
                       how='left',
                       left_index=True,
                       right_index=True)
    orca.add_table('persons', persons)
예제 #27
0
def establishments(store, zone_redevelopment):
    e = store['establishments']
    b = store['buildings']
    b_removed_idx = b.loc[np.in1d(b.parcel_id,
                                  zone_redevelopment.parcel_id)].index
    zone_ids = orca.merge_tables('buildings',
                                 tables=['buildings', 'parcels'],
                                 columns=['zone_id']).zone_id
    e = e.loc[e.employees > 0]
    e.loc[e.building_id.isin(b_removed_idx), 'building_id'] = -1
    e.loc[:, 'zone_id'] = zone_ids[e.building_id].fillna(-1).values.astype(
        'int32')
    e.zone_id.fillna(-1)
    return e
예제 #28
0
def household_to_buildings(households, buildings):
    unplaced_hh = households.to_frame(columns=["zone_id", "building_id"])
    unplaced_hh = unplaced_hh.loc[unplaced_hh.building_id == -1]

    b = orca.merge_tables(
        "buildings",
        tables=["parcels", "buildings"],
        columns=["zone_id", "residential_units", "vacant_residential_units"],
    )
    vacant_units = b.loc[b.vacant_residential_units > 0, "vacant_residential_units"]
    units = b.loc[np.repeat(vacant_units.index.values, vacant_units.values.astype("int"))].reset_index()

    print len(units)
    print len(unplaced_hh)
예제 #29
0
def emp_transition(tbl, location_fname, year):
    #tran = TabularFilteredTotalsTransition(tbl.to_frame(), 'total_number_of_jobs', ['sector_id_six','home_based_status'],
    #                                       accounting_column='employees')
    tran = TabularTotalsTransition(tbl.to_frame(), 'total_number_of_jobs', accounting_column='employees')
    cols = orca.get_table('establishments').local_columns
    add_cols = ['zone_id','county_id','sector_id_six']
    cols = cols + add_cols
    df = orca.merge_tables('establishments', tables=['establishments', 'counties'])

    print "%d establishments with %d employees before transition" % (len(df.index), df.employees.sum())
    df, added, copied, removed = tran.transition(df, year)
    print "%d establishments with %d employees after transition" % (len(df.index), df.employees.sum())

    df.loc[added, location_fname] = -1
    df.loc[added, "building_id"] = -1
    orca.add_table('establishments', df.loc[:, orca.get_table('establishments').local_columns])
    orca.add_table('updated_emp', df, cache=True, cache_scope='iteration')
예제 #30
0
def building_summary(parcels, run_number, year, buildings, initial_year,
                     final_year):

    if year not in [initial_year, final_year]:
        return

    df = orca.merge_tables('buildings', [parcels, buildings],
                           columns=[
                               'performance_zone', 'year_built',
                               'residential_units', 'unit_price', 'zone_id',
                               'non_residential_sqft', 'deed_restricted_units',
                               'job_spaces', 'x', 'y'
                           ])

    df.to_csv(
        os.path.join("runs",
                     "run%d_building_data_%d.csv" % (run_number, year)))
예제 #31
0
def building_summary(parcels, run_number, year,
                     buildings,
                     initial_year, final_year):

    if year not in [initial_year, final_year]:
        return

    df = orca.merge_tables(
        'buildings',
        [parcels, buildings],
        columns=['performance_zone', 'year_built', 'residential_units',
                 'unit_price', 'zone_id', 'non_residential_sqft',
                 'deed_restricted_units', 'job_spaces', 'x', 'y'])

    df.to_csv(
        os.path.join("runs", "run%d_building_data_%d.csv" %
                     (run_number, year))
    )
예제 #32
0
def hh_transition(households, tbl, location_fname, year):
    migration = orca.get_table('migration_data').to_frame()
    pdf = migration['net_migration'] / migration.net_migration.sum()

    tran = DRCOGHouseholdTransitionModel(tbl.to_frame(), 'total_number_of_households',
                                         prob_dist = [pdf], migration_data=migration)

    cols = orca.get_table('households').local_columns
    add_cols = ['zone_id','county_id']
    cols = cols + add_cols
    df = orca.merge_tables('households', tables=['households','counties'])
    print "%d households before transition" % len(df.index)
    df, added, copied, removed = tran.transition(df, year, [pdf])
    print "%d households after transition" % len(df.index)

    df.loc[added, location_fname] = -1
    df.loc[added, 'building_id'] = -1
    orca.add_table('households', df.loc[:, orca.get_table('households').local_columns])
    orca.add_table('updated_hh', df, cache=True, cache_scope='iteration')
예제 #33
0
def supply_demand(cfg, hh_demand, alternatives, price_col, reg_col=None, units_col=None, iterations=1):
    lcm = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg)
    demand_frame = hh_demand.to_frame()
    alts_frame = alternatives.to_frame(columns=[units_col, price_col])
    alts_seg = alts_frame.index.values
    new_price, zone_ratios = supplydemand.supply_and_demand(lcm, demand_frame, alts_frame, alts_seg,
                                                            price_col, iterations=iterations, reg_col=reg_col, clip_change_low=1,
                                                            clip_change_high=100)
    alternatives.update_col_from_series(price_col, new_price)
    #update building prices from zones
    buildings = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['unit_price_residential',
                                                                                        'unit_price_non_residential',
                                                                                        'zone_id'])
    new_price_name = {'avg_unit_price_zone' : 'unit_price_residential',
                      'avg_nonres_unit_price_zone': 'unit_price_non_residential'}

    new_price = new_price.fillna(0)

    new_price_df = pd.DataFrame(new_price)

    merged = pd.merge(buildings, new_price_df, left_on='zone_id', right_index=True)
    orca.get_table('buildings').update_col_from_series(new_price_name[price_col], merged[price_col])
예제 #34
0
def to_frame(tbl, join_tbls, cfg, additional_columns=[]):
    """
    Leverage all the built in functionality of the sim framework to join to
    the specified tables, only accessing the columns used in cfg (the model
    yaml configuration file), an any additionally passed columns (the sim
    framework is smart enough to figure out which table to grab the column
    off of)

    Parameters
    ----------
    tbl : DataFrameWrapper
        The table to join other tables to
    join_tbls : list of DataFrameWrappers or strs
        A list of tables to join to "tbl"
    cfg : str
        The filename of a yaml configuration file from which to parse the
        strings which are actually used by the model
    additional_columns : list of strs
        A list of additional columns to include

    Returns
    -------
    A single DataFrame with the index from tbl and the columns used by cfg
    and any additional columns specified
    """
    join_tbls = join_tbls if isinstance(join_tbls, list) else [join_tbls]
    tables = [tbl] + join_tbls
    cfg = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg)
    tables = [t for t in tables if t is not None]
    columns = misc.column_list(tables, cfg.columns_used()) + additional_columns
    if len(tables) > 1:
        df = orca.merge_tables(target=tables[0].name,
                               tables=tables,
                               columns=columns)
    else:
        df = tables[0].to_frame(columns)
    check_nas(df)
    return df
예제 #35
0
def to_frame(tbl, join_tbls, cfg, additional_columns=[]):
    """
    Leverage all the built in functionality of the sim framework to join to
    the specified tables, only accessing the columns used in cfg (the model
    yaml configuration file), an any additionally passed columns (the sim
    framework is smart enough to figure out which table to grab the column
    off of)

    Parameters
    ----------
    tbl : DataFrameWrapper
        The table to join other tables to
    join_tbls : list of DataFrameWrappers or strs
        A list of tables to join to "tbl"
    cfg : str
        The filename of a yaml configuration file from which to parse the
        strings which are actually used by the model
    additional_columns : list of strs
        A list of additional columns to include

    Returns
    -------
    A single DataFrame with the index from tbl and the columns used by cfg
    and any additional columns specified
    """
    join_tbls = join_tbls if isinstance(join_tbls, list) else [join_tbls]
    tables = [tbl] + join_tbls
    cfg = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg)
    tables = [t for t in tables if t is not None]
    columns = misc.column_list(tables, cfg.columns_used()) + additional_columns
    if len(tables) > 1:
        df = orca.merge_tables(target=tables[0].name,
                               tables=tables, columns=columns)
    else:
        df = tables[0].to_frame(columns)
    check_nas(df)
    return df
예제 #36
0
    def calculate_model_variables(self):
        """
        Calculate variables needed to simulate the model, and returns
        DataFrames of simulation-ready tables with needed variables.
        Returns
        -------
        choosers : pandas.DataFrame
            DataFrame of choosers.
        alternatives : pandas.DataFrame
            DataFrame of alternatives.
        """
        columns_used = self.columns_used() + [self.choice_column]
        columns_used = columns_used + [self.agent_units
                                       ] if self.agent_units else columns_used
        choosers = orca.get_table(self.choosers).to_frame(columns_used)

        supply_column_names = [
            col for col in [self.supply_variable, self.vacant_variable]
            if col is not None
        ]

        columns_used.extend(supply_column_names)

        if self.merge_tables:
            mt = copy.deepcopy(self.merge_tables)
            mt.append(self.alternatives)
            all_cols = []
            for table in mt:
                all_cols.extend(orca.get_table(table).columns)
            all_cols = [col for col in all_cols if col in columns_used]
            alternatives = orca.merge_tables(target=self.alternatives,
                                             tables=mt,
                                             columns=all_cols)
        else:
            alternatives = orca.get_table(
                self.alternatives).to_frame(columns_used + supply_column_names)
        return choosers, alternatives
예제 #37
0
def residential_sqft_zone():
    df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['residential_sqft','zone_id'])
    return df.groupby('zone_id').residential_sqft.sum()
예제 #38
0
def residential_units_zone():
    df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['residential_units','zone_id'])
    return df.groupby('zone_id').residential_units.sum().apply(np.log1p)
예제 #39
0
def residential_unit_density_zone(zones):
    df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['residential_units','zone_id'])
    return (df.groupby('zone_id').residential_units.sum()/zones.acreage)
예제 #40
0
def proportional_elcm(jobs, households, buildings, parcels,
                      year, run_number):

    juris_assumptions_df = pd.read_csv(os.path.join(
        "data",
        "juris_assumptions.csv"
    ), index_col="juris")

    # not a big fan of this - jobs with building_ids of -1 get dropped
    # by the merge so you have to grab the columns first and fill in
    # juris iff the building_id is != -1
    jobs_df = jobs.to_frame(["building_id", "empsix"])
    df = orca.merge_tables(
        target='jobs',
        tables=[jobs, buildings, parcels],
        columns=['juris', 'zone_id'])
    jobs_df["juris"] = df["juris"]
    jobs_df["zone_id"] = df["zone_id"]

    hh_df = orca.merge_tables(
        target='households',
        tables=[households, buildings, parcels],
        columns=['juris', 'zone_id', 'county'])

    # the idea here is to make sure we don't lose local retail and gov't
    # jobs - there has to be some amount of basic services to support an
    # increase in population

    buildings_df = orca.merge_tables(
        target='buildings',
        tables=[buildings, parcels],
        columns=['juris', 'zone_id', 'general_type', 'vacant_job_spaces'])

    buildings_df = buildings_df.rename(columns={
      'zone_id_x': 'zone_id', 'general_type_x': 'general_type'})

    # location options are vacant job spaces in retail buildings - this will
    # overfill certain location because we don't have enough space
    building_subset = buildings_df[buildings_df.general_type == "Retail"]
    location_options = building_subset.juris.repeat(
        building_subset.vacant_job_spaces.clip(0))

    print "Running proportional jobs model for retail"

    s = _proportional_jobs_model(
        # we now take the ratio of retail jobs to households as an input
        # that is manipulable by the modeler - this is stored in a csv
        # per jurisdiction
        juris_assumptions_df.minimum_forecast_retail_jobs_per_household,
        "RETEMPN",
        "juris",
        hh_df,
        jobs_df,
        location_options
    )

    jobs.update_col_from_series("building_id", s)

    # first read the file from disk - it's small so no table source
    taz_assumptions_df = pd.read_csv(os.path.join(
        "data",
        "taz_growth_rates_gov_ed.csv"
    ), index_col="Taz")

    # we're going to multiply various aggregations of populations by factors
    # e.g. high school jobs are multiplied by county pop and so forth - this
    # is the dict of the aggregations of household counts
    mapping_d = {
        "TAZ Pop": hh_df["zone_id"].dropna().astype('int').value_counts(),
        "County Pop": taz_assumptions_df.County.map(
            hh_df["county"].value_counts()),
        "Reg Pop": len(hh_df)
    }
    # the factors are set up in relation to pop, not hh count
    pop_to_hh = .43

    # don't need county anymore
    del taz_assumptions_df["County"]

    # multipliers are in first row (not counting the headers)
    multipliers = taz_assumptions_df.iloc[0]
    # done with the row
    taz_assumptions_df = taz_assumptions_df.iloc[1:]

    # this is weird but Pandas was giving me a strange error when I tried
    # to change the type of the index directly
    taz_assumptions_df = taz_assumptions_df.reset_index()
    taz_assumptions_df["Taz"] = taz_assumptions_df.Taz.astype("int")
    taz_assumptions_df = taz_assumptions_df.set_index("Taz")

    # now go through and multiply each factor by the aggregation it applied to
    target_jobs = pd.Series(0, taz_assumptions_df.index)
    for col, mult in zip(taz_assumptions_df.columns, multipliers):
        target_jobs += (taz_assumptions_df[col].astype('float') *
                        mapping_d[mult] * pop_to_hh).fillna(0)

    target_jobs = target_jobs.astype('int')

    print "Running proportional jobs model for gov/edu"

    # location options are vacant job spaces in retail buildings - this will
    # overfill certain location because we don't have enough space
    building_subset = buildings_df[
        buildings.general_type.isin(["Office", "School"])]
    location_options = building_subset.zone_id.repeat(
        building_subset.vacant_job_spaces.clip(0))

    # now do the same thing for gov't jobs
    s = _proportional_jobs_model(
        None,  # computing jobs directly
        "OTHEMPN",
        "zone_id",
        hh_df,
        jobs_df,
        location_options,
        target_jobs=target_jobs
    )

    jobs.update_col_from_series("building_id", s)
예제 #41
0
def inclusionary_housing_revenue_reduction(feasibility, units):

    print "Computing adjustments due to inclusionary housing"

    # AMI by jurisdiction
    #
    # in practice deed restrictions are done by household size but we aren't
    # going to deed restrict them by household size so it makes sense not to
    # do that here - if we did this by household size like we do in the real
    # world we'd need to have a better representation of what household size
    # is in which unit type

    households = orca.get_table("households")
    buildings = orca.get_table("buildings")
    parcels_geography = orca.get_table("parcels_geography")
    h = orca.merge_tables("households",
                          [households, buildings, parcels_geography],
                          columns=["juris_name", "income"])
    AMI = h.groupby(h.juris_name).income.quantile(.5)

    # per Aksel Olsen (@akselx)
    # take 90% of AMI and multiple by 33% to get the max amount a
    # household can pay per year, divide by 12 to get monthly amt,
    # subtract condo fee

    monthly_condo_fee = 250
    monthly_affordable_payment = AMI * .9 * .33 / 12 - monthly_condo_fee

    def value_can_afford(monthly_payment):
        # this is a 10 year average freddie mac interest rate
        ten_year_average_interest = .055
        return np.npv(ten_year_average_interest/12, [monthly_payment]*30*12)

    value_can_afford = {k: value_can_afford(v) for k, v in
                        monthly_affordable_payment.to_dict().items()}
    value_can_afford = pd.Series(value_can_afford)

    # account for interest and property taxes
    interest_and_prop_taxes = .013
    value_can_afford /= 1+interest_and_prop_taxes

    # there's a lot more nuance to inclusionary percentages than this -
    # e.g. specific neighborhoods get specific amounts -
    # http://sf-moh.org/modules/showdocument.aspx?documentid=7253

    pct_inclusionary = orca.get_injectable("inclusionary_housing_settings")
    juris_name = parcels_geography.juris_name.loc[feasibility.index]
    pct_affordable = juris_name.map(pct_inclusionary).fillna(0)
    value_can_afford = juris_name.map(value_can_afford)

    num_affordable_units = (units * pct_affordable).fillna(0).astype("int")

    ave_price_per_unit = \
        feasibility[('residential', 'building_revenue')] / units

    revenue_diff_per_unit = (ave_price_per_unit - value_can_afford).fillna(0)
    print "Revenue difference per unit (not zero values)"
    print revenue_diff_per_unit[revenue_diff_per_unit > 0].describe()

    revenue_reduction = revenue_diff_per_unit * num_affordable_units

    s = num_affordable_units.groupby(parcels_geography.juris_name).sum()
    print "Feasibile affordable units by jurisdiction"
    print s[s > 0].order()

    return revenue_reduction, num_affordable_units
예제 #42
0
def res_units_per_bldg():
    df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id','residential_units'])
    df = df.groupby('zone_id').residential_units.mean()
    return df
예제 #43
0
def ln_non_residential_sqft_zone():
    b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['btype_hlcm','residential_units','zone_id'])
    return b[b.btype_hlcm==3].groupby('zone_id').residential_units.sum()*100.0/(b.groupby('zone_id').residential_units.sum())
예제 #44
0
def mandatory_tours_merged(mandatory_tours, persons_merged):
    return orca.merge_tables(mandatory_tours.name,
                             [mandatory_tours, persons_merged])
예제 #45
0
def proportional_elcm(jobs, households, buildings, parcels,
                      year, run_number):

    juris_assumptions_df = pd.read_csv(os.path.join(
        "data",
        "juris_assumptions.csv"
    ), index_col="juris")

    # not a big fan of this - jobs with building_ids of -1 get dropped
    # by the merge so you have to grab the columns first and fill in
    # juris iff the building_id is != -1
    jobs_df = jobs.to_frame(["building_id", "empsix"])
    df = orca.merge_tables(
        target='jobs',
        tables=[jobs, buildings, parcels],
        columns=['juris', 'zone_id'])
    jobs_df["juris"] = df["juris"]
    jobs_df["zone_id"] = df["zone_id"]

    hh_df = orca.merge_tables(
        target='households',
        tables=[households, buildings, parcels],
        columns=['juris', 'zone_id', 'county'])

    # the idea here is to make sure we don't lose local retail and gov't
    # jobs - there has to be some amount of basic services to support an
    # increase in population

    buildings_df = orca.merge_tables(
        target='buildings',
        tables=[buildings, parcels],
        columns=['juris', 'zone_id', 'general_type', 'vacant_job_spaces'])

    buildings_df = buildings_df.rename(columns={
      'zone_id_x': 'zone_id', 'general_type_x': 'general_type'})

    # location options are vacant job spaces in retail buildings - this will
    # overfill certain location because we don't have enough space
    building_subset = buildings_df[buildings_df.general_type == "Retail"]
    location_options = building_subset.juris.repeat(
        building_subset.vacant_job_spaces.clip(0))

    print "Running proportional jobs model for retail"

    s = _proportional_jobs_model(
        # we now take the ratio of retail jobs to households as an input
        # that is manipulable by the modeler - this is stored in a csv
        # per jurisdiction
        juris_assumptions_df.minimum_forecast_retail_jobs_per_household,
        "RETEMPN",
        "juris",
        hh_df,
        jobs_df,
        location_options
    )

    jobs.update_col_from_series("building_id", s)

    # first read the file from disk - it's small so no table source
    taz_assumptions_df = pd.read_csv(os.path.join(
        "data",
        "taz_growth_rates_gov_ed.csv"
    ), index_col="Taz")

    # we're going to multiply various aggregations of populations by factors
    # e.g. high school jobs are multiplied by county pop and so forth - this
    # is the dict of the aggregations of household counts
    mapping_d = {
        "TAZ Pop": hh_df["zone_id"].dropna().astype('int').value_counts(),
        "County Pop": taz_assumptions_df.County.map(
            hh_df["county"].value_counts()),
        "Reg Pop": len(hh_df)
    }
    # the factors are set up in relation to pop, not hh count
    pop_to_hh = .43

    # don't need county anymore
    del taz_assumptions_df["County"]

    # multipliers are in first row (not counting the headers)
    multipliers = taz_assumptions_df.iloc[0]
    # done with the row
    taz_assumptions_df = taz_assumptions_df.iloc[1:]

    # this is weird but Pandas was giving me a strange error when I tried
    # to change the type of the index directly
    taz_assumptions_df = taz_assumptions_df.reset_index()
    taz_assumptions_df["Taz"] = taz_assumptions_df.Taz.astype("int")
    taz_assumptions_df = taz_assumptions_df.set_index("Taz")

    # now go through and multiply each factor by the aggregation it applied to
    target_jobs = pd.Series(0, taz_assumptions_df.index)
    for col, mult in zip(taz_assumptions_df.columns, multipliers):
        target_jobs += (taz_assumptions_df[col].astype('float') *
                        mapping_d[mult] * pop_to_hh).fillna(0)

    target_jobs = target_jobs.astype('int')

    print "Running proportional jobs model for gov/edu"

    # location options are vacant job spaces in retail buildings - this will
    # overfill certain location because we don't have enough space
    building_subset = buildings_df[
        buildings.general_type.isin(["Office", "School"])]
    location_options = building_subset.zone_id.repeat(
        building_subset.vacant_job_spaces.clip(0))

    # now do the same thing for gov't jobs
    s = _proportional_jobs_model(
        None,  # computing jobs directly
        "OTHEMPN",
        "zone_id",
        hh_df,
        jobs_df,
        location_options,
        target_jobs=target_jobs
    )

    jobs.update_col_from_series("building_id", s)
예제 #46
0
def average_resunit_size():
    b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['sqft_per_unit','zone_id'])
    return  b.groupby('zone_id').sqft_per_unit.mean()
예제 #47
0
def median_yearbuilt_pre_1950():
    b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['year_built','zone_id'])
    return (b.groupby('zone_id').year_built.median()<1950).astype('int32')
예제 #48
0
def avg_unit_price_zone():
    b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['non_residential_sqft','improvement_value','unit_price_non_residential','zone_id'])

    return  b[(b.non_residential_sqft>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_non_residential.mean()
예제 #49
0
def geographic_summary(parcels, households, jobs, buildings, taz_geography,
                       run_number, year, summary, final_year):
    # using the following conditional b/c `year` is used to pull a column
    # from a csv based on a string of the year in add_population()
    # and in add_employment() and 2009 is the
    # 'base'/pre-simulation year, as is the 2010 value in the csv.
    if year == 2009:
        year = 2010
        base = True
    else:
        base = False

    households_df = orca.merge_tables('households',
                                      [parcels, buildings, households],
                                      columns=[
                                          'pda', 'zone_id', 'juris',
                                          'superdistrict', 'persons', 'income',
                                          'base_income_quartile'
                                      ])

    jobs_df = orca.merge_tables(
        'jobs', [parcels, buildings, jobs],
        columns=['pda', 'superdistrict', 'juris', 'zone_id', 'empsix'])

    buildings_df = orca.merge_tables('buildings', [parcels, buildings],
                                     columns=[
                                         'pda', 'superdistrict', 'juris',
                                         'building_type_id', 'zone_id',
                                         'residential_units', 'building_sqft',
                                         'non_residential_sqft'
                                     ])

    parcel_output = summary.parcel_output

    # because merge_tables returns multiple zone_id_'s, but not the one we need
    buildings_df = buildings_df.rename(columns={'zone_id_x': 'zone_id'})

    geographies = ['superdistrict', 'pda', 'juris']

    if year in [2010, 2015, 2020, 2025, 2030, 2035, 2040]:

        for geography in geographies:

            # create table with household/population summaries

            summary_table = pd.pivot_table(households_df,
                                           values=['persons'],
                                           index=[geography],
                                           aggfunc=[np.size])

            summary_table.columns = ['tothh']

            # income quartile counts
            summary_table['hhincq1'] = \
                households_df.query("base_income_quartile == 1").\
                groupby(geography).size()
            summary_table['hhincq2'] = \
                households_df.query("base_income_quartile == 2").\
                groupby(geography).size()
            summary_table['hhincq3'] = \
                households_df.query("base_income_quartile == 3").\
                groupby(geography).size()
            summary_table['hhincq4'] = \
                households_df.query("base_income_quartile == 4").\
                groupby(geography).size()

            # residential buildings by type
            summary_table['sfdu'] = buildings_df.\
                query("building_type_id == 1 or building_type_id == 2").\
                groupby(geography).residential_units.sum()
            summary_table['mfdu'] = buildings_df.\
                query("building_type_id == 3 or building_type_id == 12").\
                groupby(geography).residential_units.sum()

            # employees by sector
            summary_table['totemp'] = jobs_df.\
                groupby(geography).size()
            summary_table['agrempn'] = jobs_df.query("empsix == 'AGREMPN'").\
                groupby(geography).size()
            summary_table['mwtempn'] = jobs_df.query("empsix == 'MWTEMPN'").\
                groupby(geography).size()
            summary_table['retempn'] = jobs_df.query("empsix == 'RETEMPN'").\
                groupby(geography).size()
            summary_table['fpsempn'] = jobs_df.query("empsix == 'FPSEMPN'").\
                groupby(geography).size()
            summary_table['herempn'] = jobs_df.query("empsix == 'HEREMPN'").\
                groupby(geography).size()
            summary_table['othempn'] = jobs_df.query("empsix == 'OTHEMPN'").\
                groupby(geography).size()

            # summary columns
            summary_table['occupancy_rate'] = summary_table['tothh'] / \
                (summary_table['sfdu'] + summary_table['mfdu'])
            summary_table['non_residential_sqft'] = buildings_df.\
                groupby(geography)['non_residential_sqft'].sum()
            summary_table['sq_ft_per_employee'] = \
                summary_table['non_residential_sqft'] / summary_table['totemp']

            if parcel_output is not None:
                parcel_output['subsidized_units'] = \
                    parcel_output.deed_restricted_units - \
                    parcel_output.inclusionary_units

                # columns re: affordable housing
                summary_table['deed_restricted_units'] = \
                    parcel_output.groupby(geography).\
                    deed_restricted_units.sum()
                summary_table['inclusionary_units'] = \
                    parcel_output.groupby(geography).inclusionary_units.sum()
                summary_table['subsidized_units'] = \
                    parcel_output.groupby(geography).subsidized_units.sum()
                summary_table['inclusionary_revenue_reduction'] = \
                    parcel_output.groupby(geography).\
                    policy_based_revenue_reduction.sum()
                summary_table['inclusionary_revenue_reduction_per_unit'] = \
                    summary_table.inclusionary_revenue_reduction / \
                    summary_table.inclusionary_units
                summary_table['total_subsidy'] = \
                    parcel_output[parcel_output.subsidized_units > 0].\
                    groupby(geography).max_profit.sum() * -1
                summary_table['subsidy_per_unit'] = \
                    summary_table.total_subsidy / \
                    summary_table.subsidized_units

            # fill in 0 values where there are NA's so that summary table
            # outputs are the same over the years otherwise a PDA or summary
            # geography would be dropped if it had no employment or housing
            if geography == 'superdistrict':
                all_summary_geographies = buildings_df[geography].unique()
            else:
                all_summary_geographies = parcels[geography].unique()
            summary_table = \
                summary_table.reindex(all_summary_geographies).fillna(0)

            if base is False:
                summary_csv = "runs/run{}_{}_summaries_{}.csv".\
                    format(run_number, geography, year)
            elif base is True:
                summary_csv = "runs/run{}_{}_summaries_{}.csv".\
                    format(run_number, geography, 2009)
            summary_table.to_csv(summary_csv)

    # ##############################
    # ##############################
    # ##Write Summary of Accounts###
    # ##############################
    # ##############################

    if year == final_year:
        for acct_name, acct in orca.get_injectable("coffer").iteritems():
            fname = "runs/run{}_acctlog_{}_{}.csv".\
                format(run_number, acct_name, year)
            acct.to_frame().to_csv(fname)

    # ##############################
    # ##############################
    # ####Write Urban Footprint#####
    # #########Summary##############
    # ##############################
    # ##############################

        buildings_uf_df = orca.merge_tables('buildings', [parcels, buildings],
                                            columns=[
                                                'urban_footprint',
                                                'year_built', 'acres',
                                                'residential_units',
                                                'non_residential_sqft'
                                            ])

        buildings_uf_df['count'] = 1

        s1 = buildings_uf_df['residential_units'] / buildings_uf_df['acres']
        s2 = s1 > 1
        s3 = (buildings_uf_df['urban_footprint'] == 0) * 1
        buildings_uf_df['denser_greenfield'] = s3 * s2

        df = buildings_uf_df.\
            loc[buildings_uf_df['year_built'] > 2010].\
            groupby('urban_footprint').sum()
        df = df[[
            'count', 'residential_units', 'non_residential_sqft', 'acres'
        ]]

        df2 = buildings_uf_df.\
            loc[buildings_uf_df['year_built'] > 2010].\
            groupby('denser_greenfield').sum()
        df2 = df2[[
            'count', 'residential_units', 'non_residential_sqft', 'acres'
        ]]

        formatters = {
            'count': '{:.0f}',
            'residential_units': '{:.0f}',
            'non_residential_sqft': '{:.0f}',
            'acres': '{:.0f}'
        }

        df = format_df(df, formatters)

        df2 = format_df(df2, formatters)

        df = df.transpose()

        df2 = df2.transpose()

        df[2] = df2[1]

        df.columns = [
            'urban_footprint_0', 'urban_footprint_1', 'denser_greenfield'
        ]
        uf_summary_csv = "runs/run{}_urban_footprint_summary_{}.csv".\
            format(run_number, year)
        df.to_csv(uf_summary_csv)
예제 #50
0
def tours_merged(tours, persons_merged):
    return orca.merge_tables(tours.name, tables=[tours, persons_merged])
예제 #51
0
def jobs_within_15min(zones, t_data_dist15):
    b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id'])
    t_data=t_data_dist15.to_frame()
    t_data.loc[:,'attr']=zones.zonal_emp[t_data_dist15.to_zone_id].values
    zone_time_range=t_data.groupby(level=0).attr.apply(np.sum)
    return reindex(zone_time_range, b.zone_id)
예제 #52
0
def ln_non_residential_sqft_zone():
    df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['non_residential_sqft','zone_id'])
    return df.groupby('zone_id').non_residential_sqft.sum().apply(np.log1p)
예제 #53
0
def inclusionary_housing_revenue_reduction(feasibility, units):

    print "Computing adjustments due to inclusionary housing"

    # AMI by jurisdiction
    #
    # in practice deed restrictions are done by household size but we aren't
    # going to deed restrict them by household size so it makes sense not to
    # do that here - if we did this by household size like we do in the real
    # world we'd need to have a better representation of what household size
    # is in which unit type

    households = orca.get_table("households")
    buildings = orca.get_table("buildings")
    parcels_geography = orca.get_table("parcels_geography")
    h = orca.merge_tables("households",
                          [households, buildings, parcels_geography],
                          columns=["juris_name", "income"])
    AMI = h.groupby(h.juris_name).income.quantile(.5)

    # per Aksel Olsen (@akselx)
    # take 90% of AMI and multiple by 33% to get the max amount a
    # household can pay per year, divide by 12 to get monthly amt,
    # subtract condo fee

    monthly_condo_fee = 250
    monthly_affordable_payment = AMI * .9 * .33 / 12 - monthly_condo_fee

    def value_can_afford(monthly_payment):
        # this is a 10 year average freddie mac interest rate
        ten_year_average_interest = .055
        return np.npv(ten_year_average_interest / 12,
                      [monthly_payment] * 30 * 12)

    value_can_afford = {
        k: value_can_afford(v)
        for k, v in monthly_affordable_payment.to_dict().items()
    }
    value_can_afford = pd.Series(value_can_afford)

    # account for interest and property taxes
    interest_and_prop_taxes = .013
    value_can_afford /= 1 + interest_and_prop_taxes

    # there's a lot more nuance to inclusionary percentages than this -
    # e.g. specific neighborhoods get specific amounts -
    # http://sf-moh.org/modules/showdocument.aspx?documentid=7253

    pct_inclusionary = orca.get_injectable("inclusionary_housing_settings")
    juris_name = parcels_geography.juris_name.loc[feasibility.index]
    pct_affordable = juris_name.map(pct_inclusionary).fillna(0)
    value_can_afford = juris_name.map(value_can_afford)

    num_affordable_units = (units * pct_affordable).fillna(0).astype("int")

    ave_price_per_unit = \
        feasibility[('residential', 'building_revenue')] / units

    revenue_diff_per_unit = (ave_price_per_unit - value_can_afford).fillna(0)
    print "Revenue difference per unit (not zero values)"
    print revenue_diff_per_unit[revenue_diff_per_unit > 0].describe()

    revenue_reduction = revenue_diff_per_unit * num_affordable_units

    s = num_affordable_units.groupby(parcels_geography.juris_name).sum()
    print "Feasibile affordable units by jurisdiction"
    print s[s > 0].sort_values()

    return revenue_reduction, num_affordable_units
예제 #54
0
def topsheet(households, jobs, buildings, parcels, zones, year, run_number,
             taz_geography, parcels_zoning_calculations, summary, settings,
             parcels_geography):

    hh_by_subregion = misc.reindex(taz_geography.subregion,
                                   households.zone_id).value_counts()

    households_df = orca.merge_tables(
        'households', [parcels_geography, buildings, households],
        columns=['pda_id', 'tpp_id', 'income'])

    hh_by_inpda = households_df.pda_id.notnull().value_counts()

    hhincome_by_intpp = households_df.income.groupby(
        households_df.tpp_id.notnull()).mean()
    # round to nearest 100s
    hhincome_by_intpp = (hhincome_by_intpp / 100).round() * 100

    jobs_by_subregion = misc.reindex(taz_geography.subregion,
                                     jobs.zone_id).value_counts()

    jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs],
                                columns=['pda'])

    jobs_by_inpda = jobs_df.pda.notnull().value_counts()

    capacity = parcels_zoning_calculations.\
        zoned_du_underbuild_nodev.groupby(parcels.subregion).sum()

    if year == 2010:
        # save some info for computing growth measures
        orca.add_injectable(
            "base_year_measures", {
                "hh_by_subregion": hh_by_subregion,
                "jobs_by_subregion": jobs_by_subregion,
                "hh_by_inpda": hh_by_inpda,
                "jobs_by_inpda": jobs_by_inpda,
                "hhincome_by_intpp": hhincome_by_intpp,
                "capacity": capacity
            })

    # if year != 2040:
    #    return

    base_year_measures = orca.get_injectable("base_year_measures")

    f = open(
        os.path.join("runs", "run%d_topsheet_%d.log" % (run_number, year)),
        "w")

    def write(s):
        # print s
        f.write(s + "\n\n")

    def norm_and_round(s):
        # normalize and round a series
        return str((s / s.sum()).round(2))

    nhh = len(households)
    write("Number of households = %d" % nhh)
    nj = len(jobs)
    write("Number of jobs = %d" % nj)

    n = len(households.building_id[households.building_id == -1])
    write("Number of unplaced households = %d" % n)

    n = len(jobs.building_id[jobs.building_id == -1])
    write("Number of unplaced jobs = %d" % n)

    du = buildings.residential_units.sum()
    write("Number of residential units = %d" % du)
    write("Residential vacancy rate = %.2f" % (1 - 0 - float(nhh) / du))

    du = buildings.deed_restricted_units.sum()
    write("Number of deed restricted units = %d" % du)

    write("Base year mean income by whether household is in tpp:\n%s" %
          base_year_measures["hhincome_by_intpp"])

    write("Horizon year mean income by whether household is in tpp:\n%s" %
          hhincome_by_intpp)

    jsp = buildings.job_spaces.sum()
    write("Number of job spaces = %d" % jsp)
    write("Non-residential vacancy rate = %.2f" % (1 - 0 - float(nj) / jsp))

    tmp = base_year_measures["hh_by_subregion"]
    write("Households base year share by subregion:\n%s" % norm_and_round(tmp))

    write("Households share by subregion:\n%s" %
          norm_and_round(hh_by_subregion))
    diff = hh_by_subregion - base_year_measures["hh_by_subregion"]

    write("Households pct of regional growth by subregion:\n%s" %
          norm_and_round(diff))

    tmp = base_year_measures["jobs_by_subregion"]
    write("Jobs base year share by subregion:\n%s" % norm_and_round(tmp))

    write("Jobs share by subregion:\n%s" % norm_and_round(jobs_by_subregion))
    diff = jobs_by_subregion - base_year_measures["jobs_by_subregion"]

    write("Jobs pct of regional growth by subregion:\n%s" %
          norm_and_round(diff))

    tmp = base_year_measures["hh_by_inpda"]
    write("Households base year share in pdas:\n%s" % norm_and_round(tmp))

    write("Households share in pdas:\n%s" % norm_and_round(hh_by_inpda))
    diff = hh_by_inpda - base_year_measures["hh_by_inpda"]

    write("Households pct of regional growth in pdas:\n%s" %
          norm_and_round(diff))

    tmp = base_year_measures["jobs_by_inpda"]
    write("Jobs base year share in pdas:\n%s" % norm_and_round(tmp))

    write("Jobs share in pdas:\n%s" % norm_and_round(jobs_by_inpda))
    diff = jobs_by_inpda - base_year_measures["jobs_by_inpda"]

    write("Jobs pct of regional growth in pdas:\n%s" % norm_and_round(diff))

    write("Base year dwelling unit raw capacity:\n%s" %
          base_year_measures["capacity"])

    write("Dwelling unit raw capacity:\n%s" % capacity)

    if summary.parcel_output is not None:
        df = summary.parcel_output
        # we mark greenfield as a parcel with less than 500 current sqft
        greenfield = df.total_sqft < 500

        write(
            "Current share of projects which are greenfield development:\n%s" %
            norm_and_round(greenfield.value_counts()))

        write("Current share of units which are greenfield development:\n%s" %
              norm_and_round(df.residential_units.groupby(greenfield).sum()))

    cmap = settings["county_id_tm_map"]
    jobs_by_county = jobs.zone_id.map(taz_geography.county)\
        .map(cmap).value_counts()
    households_by_county = households.zone_id.map(taz_geography.county)\
        .map(cmap).value_counts()
    jobs_by_housing = jobs_by_county / households_by_county.replace(0, 1)
    write("Jobs/housing balance:\n" + str(jobs_by_housing))

    f.close()
예제 #55
0
def non_mandatory_tours_merged(non_mandatory_tours, persons_merged):
    tours = non_mandatory_tours
    return orca.merge_tables(tours.name, tables=[tours, persons_merged])
예제 #56
0
def property_taxes(buildings, parcels_geography, acct_settings, coffer, year):
    buildings = orca.merge_tables('buildings', [buildings, parcels_geography])
    tax_buildings(buildings, acct_settings, coffer["prop_tax_acct"], year)
예제 #57
0
def persons_merged(persons, households, land_use, accessibility):
    return orca.merge_tables(persons.name, tables=[
        persons, households, land_use, accessibility])
예제 #58
0
def households_merged(households, land_use, accessibility):
    return orca.merge_tables(households.name, tables=[
        households, land_use, accessibility])
예제 #59
0
def persons_merged(persons, households, land_use, accessibility):
    return orca.merge_tables(
        persons.name, tables=[persons, households, land_use, accessibility])
예제 #60
0
def btype(households):
    df = orca.merge_tables('households', tables=['households','buildings'], columns=['building_type_id'])
    return 1*(df.building_type_id==2) + 2*(df.building_type_id==3) + 3*(df.building_type_id==20) + 4*np.invert(np.in1d(df.building_type_id,[2,3,20]))