Ejemplo n.º 1
0
def setup_orca(dfa, dfb, dfa_col, dfb_col, dfa_factor, dfb_factor):
    orca.add_injectable('a_factor', dfa_factor)

    @orca.injectable()
    def b_factor():
        return dfb_factor

    orca.add_table('dfa', dfa)

    @orca.table('dfb')
    def dfb_table():
        return dfb

    orca.add_column('dfa', 'acol', dfa_col)
    orca.add_column('dfb', 'bcol', dfb_col)

    @orca.column('dfa')
    def extra_acol(a_factor):
        return dfa_col * a_factor

    @orca.column('dfb')
    def extra_bcol(b_factor):
        return dfb_col * b_factor

    orca.broadcast('dfb', 'dfa', cast_on='a_id', onto_index=True)

    @orca.step()
    def test_step(dfa, dfb):
        pass
Ejemplo n.º 2
0
def load_rental_listings():
    """
    This initialization step loads the Craigslist rental listings data for
    hedonic estimation. Not needed for simulation.

    Data expectations
    -----------------
    - injectable 'net' that can provide 'node_id' and 'tmnode_id' from
      lat-lon coordinates
    - some way to get 'zone_id' (currently using parcels table)
    - 'sfbay_craigslist.csv' file

    Results
    -------
    - creates new 'craigslist' table with the following columns:
        - 'price' (int, may be missing)
        - 'sqft_per_unit' (int, may be missing)
        - 'price_per_sqft' (float, may be missing)
        - 'bedrooms' (int, may be missing)
        - 'neighborhood' (string, ''-filled)
        - 'node_id' (int, may be missing, corresponds to index of 'nodes')
        - 'tmnode_id' (int, may be missing, corresponds to index of 'tmnodes')
        - 'zone_id' (int, may be missing, corresponds to index of 'zones')
    - adds broadcasts linking 'craigslist' to 'nodes', 'tmnodes', 'logsums'
    """
    @orca.table('craigslist', cache=True)
    def craigslist(store):
        df = store['rentals']
        net = orca.get_injectable('net')
        df['node_id'] = net['walk'].get_node_ids(
            df['longitude'], df['latitude'])
        df['tmnode_id'] = net['drive'].get_node_ids(
            df['longitude'], df['latitude'])
        # fill nans -- missing bedrooms are mostly studio apts
        df['bedrooms'] = df.bedrooms.replace(np.nan, 1)
        df['neighborhood'] = df.neighborhood.replace(np.nan, '')

        # gotta do this to use the same yaml for estimation and simulation
        df['sqft_per_unit'] = df['sqft']
        df['price_per_sqft'] = df['rent_sqft']
        return df

    # Is it simpler to just do this in the table definition since it
    # is never updated?
    @orca.column('craigslist', 'zone_id', cache=True)
    def zone_id(craigslist, parcels):
        return misc.reindex(parcels.zone_id, craigslist.node_id)

    orca.broadcast('nodes', 'craigslist', cast_index=True, onto_on='node_id')
    orca.broadcast('tmnodes', 'craigslist', cast_index=True,
                   onto_on='tmnode_id')
    orca.broadcast('zones', 'craigslist', cast_index=True, onto_on='zone_id')
    orca.broadcast('logsums', 'craigslist', cast_index=True, onto_on='zone_id')
    return
Ejemplo n.º 3
0
@orca.table('travel_data', cache=True)
def travel_data(store):
    df = store['travel_data']
    return df

@orca.table('zones', cache=True)
def zones(store):
    df = store['zones']
    return df

@orca.table('zoning_heights', cache=True)
def zoning_heights(store):
    df = store['zoning_heights']
    return df

orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
orca.broadcast('fazes', 'zones', cast_index=True, onto_on='faz_id')
orca.broadcast('gridcells', 'parcels', cast_index=True, onto_on='grid_id')
orca.broadcast('households', 'persons', cast_index=True, onto_on='household_id')
#orca.broadcast('jobs', 'households', cast_index=True, onto_on='job_id')
orca.broadcast('jobs', 'persons', cast_index=True, onto_on='job_id')
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast('parcels', 'schools', cast_index=True, onto_on='parcel_id')
orca.broadcast('tractcity', 'parcels', cast_index=True, onto_on='tractcity_id')
orca.broadcast('zones', 'parcels', cast_index=True, onto_on='zone_id')
orca.broadcast('zones', 'persons_for_estimation', cast_index=True, onto_on='household_zone_id')
orca.broadcast('zones', 'persons', cast_index=True, onto_on='household_zone_id')
orca.broadcast('buildings', 'households_for_estimation', cast_index=True, onto_on='building_id')
orca.broadcast('buildings_lag1', 'households_for_estimation', cast_index=True, onto_on='building_id')
orca.broadcast('households_for_estimation', 'persons_for_estimation', cast_index=True, onto_on='household_id')
Ejemplo n.º 4
0
import orca
import pandas as pd


@orca.table(cache=True)
def accessibility(store):
    df = store["skims/accessibility"]
    df.columns = [c.upper() for c in df.columns]
    return df


@orca.column("accessibility")
def mode_choice_logsums(accessibility):
    # TODO a big todo here is to compute actual mode choice logsums from our
    # TODO upcoming mode choice model
    return pd.Series(0, accessibility.index)


# this would be accessibility around the household location - be careful with
# this one as accessibility at some other location can also matter
orca.broadcast('accessibility', 'households', cast_index=True, onto_on='TAZ')
Ejemplo n.º 5
0
        2: "Urban",
        3: "Suburban",
        4: "Rural"
    })
    return tg


# these are shapes - "zones" in the bay area


@orca.table('zones', cache=True)
def zones(store):
    df = store['zones']
    df = df.sort_index()
    return df


# this specifies the relationships between tables
orca.broadcast('parcels_geography', 'buildings', cast_index=True,
               onto_on='parcel_id')
orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id')
orca.broadcast('parcels', 'homesales', cast_index=True, onto_on='parcel_id')
orca.broadcast('nodes', 'homesales', cast_index=True, onto_on='node_id')
orca.broadcast('tmnodes', 'homesales', cast_index=True, onto_on='tmnode_id')
orca.broadcast('nodes', 'costar', cast_index=True, onto_on='node_id')
orca.broadcast('tmnodes', 'costar', cast_index=True, onto_on='tmnode_id')
orca.broadcast('logsums', 'homesales', cast_index=True, onto_on='zone_id')
orca.broadcast('logsums', 'costar', cast_index=True, onto_on='zone_id')
orca.broadcast('taz_geography', 'parcels', cast_index=True,
               onto_on='zone_id')
Ejemplo n.º 6
0
    df = df.reset_index().drop_duplicates(subset='parcel').set_index('parcel')
    return df


# this is the actual zoning
@orca.table('zoning', cache=True)
def zoning(store):
    df = store['zoning']
    return df


# zoning for use in the "baseline" scenario
# comes in the hdf5
@orca.table('zoning_baseline', cache=True)
def zoning_baseline(zoning, zoning_for_parcels):
    df = pd.merge(zoning_for_parcels.to_frame(),
                  zoning.to_frame(),
                  left_on='zoning',
                  right_index=True)
    return df


orca.broadcast('zones', 'homesales', cast_index=True, onto_on='zone_id')
orca.broadcast('zones', 'costar', cast_index=True, onto_on='zone_id')
orca.broadcast('zones', 'apartments', cast_index=True, onto_on='zone_id')
orca.broadcast('zones', 'buildings', cast_index=True, onto_on='zone_id')
orca.broadcast('zones_prices', 'buildings', cast_index=True, onto_on='zone_id')
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
Ejemplo n.º 7
0
    t_data=travel_data.to_frame(columns=['am_single_vehicle_to_work_travel_time']).reset_index(level=1)
    return t_data[['to_zone_id']][t_data.am_single_vehicle_to_work_travel_time<20]

@orca.table('t_data_dist30', cache=True)
def dist30( travel_data):
    t_data=travel_data.to_frame(columns=['am_single_vehicle_to_work_travel_time']).reset_index(level=1)
    return t_data[['to_zone_id']][t_data.am_single_vehicle_to_work_travel_time<30]

@orca.table('t_data_dist15', cache=True)
def dist30( travel_data):
    t_data=travel_data.to_frame(columns=['am_single_vehicle_to_work_travel_time']).reset_index(level=1)
    return t_data[['to_zone_id']][t_data.am_single_vehicle_to_work_travel_time<15]

@orca.table('t_data_dist45', cache=True)
def dist30( travel_data):
    t_data=travel_data.to_frame(columns=['am_single_vehicle_to_work_travel_time']).reset_index(level=1)
    return t_data[['to_zone_id']][t_data.am_single_vehicle_to_work_travel_time<45]


#broadcass

orca.broadcast('zones', 'parcels', cast_index=True, onto_on='zone_id')
orca.broadcast('parcels','buildings', cast_index=True, onto_on='parcel_id', onto_index=False)
orca.broadcast('buildings','households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'establishments', cast_index=True, onto_on ='building_id')
orca.broadcast('zoning', 'parcels', cast_index=True, onto_on='zoning_id')
orca.broadcast('fars', 'parcels', cast_index=True, onto_on='far_id')
orca.broadcast('counties','zones', cast_index=True, onto_index=True)
orca.broadcast('buildings','households_for_estimation', cast_index=True, onto_on='building_id')
orca.broadcast('counties', 'establishments', cast_index=True, onto_on='zone_id')
orca.broadcast('counties', 'households', cast_index=True, onto_on='zone_id')
Ejemplo n.º 8
0
# these are shapes - "zones" in the bay area
@orca.table('zones', cache=True)
def zones(store):
    df = store['zones']
    return df


# these are dummy returns that last until accessibility runs
@orca.table("nodes", cache=True)
def nodes():
    return pd.DataFrame()


@orca.table("logsums", cache=True)
def logsums(settings):
    logsums_index = settings.get("logsums_index_col", "taz")
    return pd.read_csv(os.path.join(misc.data_dir(),
                                    'logsums.csv'),
                       index_col=logsums_index)


# this specifies the relationships between tables
orca.broadcast('nodes', 'buildings', cast_index=True, onto_on='node_id')
orca.broadcast('nodes', 'parcels', cast_index=True, onto_on='node_id')
orca.broadcast('logsums', 'buildings', cast_index=True, onto_on='zone_id')
orca.broadcast('logsums', 'parcels', cast_index=True, onto_on='zone_id')
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast(
    'buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
Ejemplo n.º 9
0
orca.get_table('/2015/jobs').to_frame().head()
# what is code number for other employment types? to be continued...
####################################################################

## population by year and geographic units 
# read population, household, parcel file, 
df1 = orca.get_table('/2015/persons').to_frame()
orca.add_table('my_person', df1)
df2 = orca.get_table('/2015/households').to_frame()
df2['household_id'] = df2.index
orca.add_table('my_household', df2)
df3 = orca.get_table('/2015/parcels').to_frame()
orca.add_table('my_parcel', df3)

# define the merging relationship, between person and household
orca.broadcast(cast='my_person', onto='my_household', cast_index=True, onto_on='household_id')
#orca.broadcast(cast='my_household', onto='my_parcel', cast_index=True, onto_on='census_2010_block_group_id')

my_col = ['population_2015']
#my_col = ['population_2015', 'faz_id']

@orca.step()
def get_person_geo():
    # join person data with hourshold data
    df4 = orca.merge_tables(target='my_household', tables=['my_person', 'my_household'])
    # geographic info to dictionary 
    faz_dict = dict(zip(df3['census_2010_block_group_id'], df3['faz_id']))
    zone_dict = dict(zip(df3['census_2010_block_group_id'], df3['zone_id']))
    city_dict = dict(zip(df3['census_2010_block_group_id'], df3['city_id']))
    # map geo info to person table
    df4['faz_id'] = df4['census_2010_block_group_id'].map(faz_dict)
Ejemplo n.º 10
0
    df = store['assessor_transactions']
    df["index"] = df.index
    df.drop_duplicates(cols='index', take_last=True, inplace=True)
    del df["index"]
    return df
    
@orca.table('luz_base_indicators', cache=True)
def luz_base_indicators(store):
    households = store['households'][['building_id']]
    jobs = store['jobs'][['building_id']]
    buildings = store['buildings'][['parcel_id']]
    parcels = store['parcels'][['luz_id']]
    buildings['luz_id'] = misc.reindex(parcels.luz_id, buildings.parcel_id)
    households['luz_id'] = misc.reindex(buildings.luz_id, households.building_id)
    jobs['luz_id'] = misc.reindex(buildings.luz_id, jobs.building_id)
    hh_luz_base = households.groupby('luz_id').size()
    emp_luz_base = jobs.groupby('luz_id').size()
    return pd.DataFrame({'hh_base':hh_luz_base, 'emp_base':emp_luz_base})


# this specifies the relationships between tables
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast(
    'buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
orca.broadcast('nodes', 'buildings', cast_index=True, onto_on='node_id')
orca.broadcast('nodes', 'parcels', cast_index=True, onto_on='node_id')
orca.broadcast('nodes', 'costar', cast_index=True, onto_on='node_id')
orca.broadcast('parcels', 'costar', cast_index=True, onto_on='parcel_id')
orca.broadcast('nodes', 'assessor_transactions', cast_index=True, onto_on='node_id')
orca.broadcast('parcels', 'assessor_transactions', cast_index=True, onto_on='parcel_id')
Ejemplo n.º 11
0

# census tracts for parcels, to assign earthquake probabilities
@orca.table(cache=True)
def parcels_tract():
    return pd.read_csv(
        os.path.join(misc.data_dir(), "parcel_tract_xwalk.csv"),
        index_col='parcel_id')


# earthquake and fire damage probabilities for census tracts
@orca.table(cache=True)
def tracts_earthquake():
    return pd.read_csv(
        os.path.join(misc.data_dir(), "tract_damage_earthquake.csv"))


# this specifies the relationships between tables
orca.broadcast('buildings', 'residential_units', cast_index=True,
               onto_on='building_id')
orca.broadcast('residential_units', 'households', cast_index=True,
               onto_on='unit_id')
orca.broadcast('parcels_geography', 'buildings', cast_index=True,
               onto_on='parcel_id')
orca.broadcast('parcels', 'buildings', cast_index=True,
               onto_on='parcel_id')
# not defined in urbansim_Defaults
orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id')
orca.broadcast('taz_geography', 'parcels', cast_index=True,
               onto_on='zone_id')
Ejemplo n.º 12
0
import orca


@orca.table(cache=True)
def land_use(store):
    return store["land_use/taz_data"]


orca.broadcast('land_use', 'households', cast_index=True, onto_on='TAZ')


@orca.column("land_use")
def total_households(land_use):
    return land_use.local.TOTHH


@orca.column("land_use")
def total_employment(land_use):
    return land_use.local.TOTEMP


@orca.column("land_use")
def total_acres(land_use):
    return land_use.local.TOTACRE


@orca.column("land_use")
def county_id(land_use):
    return land_use.local.COUNTY

Ejemplo n.º 13
0
@orca.table('scheduled_development_events', cache=True)
def scheduled_development_events(store):
    return store['scheduled_development_events']


@orca.table('zoning', cache=True)
def zoning(store):
    return store['zoning']


@orca.table('zoning_allowed_uses', cache=True)
def zoning_allowed_uses(store, parcels):
    zoning_allowed_uses_df = store['zoning_allowed_uses']
    parcels = parcels.to_frame(columns = ['zoning_id',])
    allowed_df = pd.DataFrame(index=parcels.index)

    for devtype in np.unique(zoning_allowed_uses_df.index.values):
        devtype_allowed = zoning_allowed_uses_df.loc[zoning_allowed_uses_df.index == devtype]\
            .reset_index().set_index('zoning_id')
        allowed = misc.reindex(devtype_allowed.development_type_id, parcels.zoning_id)
        df = pd.DataFrame(data=False, index=allowed.index, columns=['allowed'])
        df[~allowed.isnull()] = True
        allowed_df[devtype] = df.allowed

    return allowed_df

orca.broadcast('parcels', 'assessor_transactions', cast_index=True, onto_on='parcel_id')
orca.broadcast('nodes', 'assessor_transactions', cast_index=True, onto_on='node_id')
orca.broadcast('zoning', 'parcels', cast_index=True, onto_on='zoning_id')
                         columns=trace_columns,
                         warn_if_empty=True)


"""
This reprocesses the choice of index of the mandatory tour frequency
alternatives into an actual dataframe of tours.  Ending format is
the same as got non_mandatory_tours except trip types are "work" and "school"
"""


@orca.table(cache=True)
def mandatory_tours(persons):
    persons = persons.to_frame(
        columns=["mandatory_tour_frequency", "is_worker"])
    persons = persons[~persons.mandatory_tour_frequency.isnull()]
    df = process_mandatory_tours(persons)

    return df


# broadcast mandatory_tours on to persons using the person_id foreign key
orca.broadcast('persons',
               'mandatory_tours',
               cast_index=True,
               onto_on='person_id')
orca.broadcast('persons_merged',
               'mandatory_tours',
               cast_index=True,
               onto_on='person_id')
    # convert indexes to alternative names
    choices = pd.Series(
        mandatory_tour_frequency_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    print "Choices:\n", choices.value_counts()
    orca.add_column("persons", "mandatory_tour_frequency", choices)


"""
This reprocesses the choice of index of the mandatory tour frequency
alternatives into an actual dataframe of tours.  Ending format is
the same as got non_mandatory_tours except trip types are "work" and "school"
"""


@orca.table(cache=True)
def mandatory_tours(persons):
    persons = persons.to_frame(columns=["mandatory_tour_frequency",
                                        "is_worker"])
    persons = persons[~persons.mandatory_tour_frequency.isnull()]
    return process_mandatory_tours(persons)


# broadcast mandatory_tours on to persons using the person_id foreign key
orca.broadcast('persons', 'mandatory_tours',
               cast_index=True, onto_on='person_id')
orca.broadcast('persons_merged', 'mandatory_tours',
               cast_index=True, onto_on='person_id')
Ejemplo n.º 16
0
df_pet = pd.DataFrame(
    {'pet_name': ['wilkie', 'lassie', 'leo', 'felix', 'rex' ],
    'age': [14, 104, 3, 82, 7 ],
     'iq': [100, 140, 87, 120, 94],
     'species_id': ['D', 'D', 'C', 'C', 'D']},
    index=['p0', 'p1', 'p2', 'p3', 'p4'])

df_pet['init_age'] = df_pet['age']
df_pet

# register the tables
orca.add_table('species', df_species)
orca.add_table('pets', df_pet)

# broadcast so we can merge pets and species dataframes on pets.species_id
orca.broadcast(cast='species', onto='pets', cast_index=True, onto_on='species_id')

# this is a common merge so might as well define it once here and use it
@orca.table()
def pets_merged(pets, species):
    return orca.merge_tables(pets.name,
        tables=[pets, species])

# this is the orca registered version of the merged table
orca.get_table('pets_merged').to_frame()


##########################################################################
# simple_simulate
##########################################################################
Ejemplo n.º 17
0
    if "households_sample_size" in settings:
        return asim.random_rows(store["households"],
                                settings["households_sample_size"])

    return store["households"]


# this is a common merge so might as well define it once here and use it
@orca.table()
def households_merged(households, land_use, accessibility):
    return orca.merge_tables(households.name, tables=[
        households, land_use, accessibility])


orca.broadcast('households', 'persons', cast_index=True, onto_on='household_id')


@orca.column("households")
def income_in_thousands(households):
    return households.income / 1000


@orca.column("households")
def income_segment(households):
    return pd.cut(households.income_in_thousands,
                  bins=[-np.inf, 30, 60, 100, np.inf],
                  labels=[1, 2, 3, 4])


@orca.column("households")
Ejemplo n.º 18
0
    store = orca.get_injectable("store")
    orca.add_table(name, store[name])

orca.add_table("remi_pop_total", pd.read_csv("data/remi_hhpop_bylarge.csv", index_col='large_area_id'))
orca.add_table('target_vacancies', pd.read_csv("data/target_vacancies.csv"))


# these are dummy returns that last until accessibility runs
for node_tbl in ['nodes', 'nodes_walk', 'nodes_drv']:
    empty_df = pd.DataFrame()
    orca.add_table(node_tbl, empty_df)

# GQ placeholders
# for gq_tbl in ['tazcounts2040gq', 'tazcounts2015gq', 'tazcounts2020gq', 'tazcounts2035gq', 'tazcounts2025gq',
#                'tazcounts2030gq']:
#     empty_df = pd.DataFrame()
#     orca.add_table(gq_tbl, empty_df)

# this specifies the relationships between tables
orca.broadcast('nodes_walk', 'buildings', cast_index=True, onto_on='nodeid_walk')
orca.broadcast('nodes_walk', 'parcels', cast_index=True, onto_on='nodeid_walk')
orca.broadcast('nodes_drv', 'buildings', cast_index=True, onto_on='nodeid_drv')
orca.broadcast('nodes_drv', 'parcels', cast_index=True, onto_on='nodeid_drv')
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
orca.broadcast('households', 'persons', cast_index=True, onto_on='household_id')
orca.broadcast('building_types', 'buildings', cast_index=True, onto_on='building_type_id')
orca.broadcast('zones', 'parcels', cast_index=True, onto_on='zone_id')
orca.broadcast('schools', 'parcels', cast_on='parcel_id', onto_index=True)