def test_property_persistence(orca_session):
    """
    Test persistence of properties across registration, saving, and reloading.
    
    """
    t = LoadTable()
    t.table = 'buildings'
    t.source_type = 'csv'
    t.path = 'data/buildings.csv'
    t.csv_index_cols = 'building_id'
    t.extra_settings = {
        'make_data_awesome': True
    }  # unfortunately not a valid setting
    t.cache = False
    t.cache_scope = 'iteration'
    t.copy_col = False
    t.name = 'buildings-csv'
    t.tags = ['awesome', 'data']
    t.autorun = False

    d1 = t.to_dict()
    modelmanager.register(t)
    modelmanager.initialize()
    d2 = modelmanager.get_step(t.name).to_dict()

    assert d1 == d2
    modelmanager.remove_step(t.name)
def test_observation_sampling():
    mm.initialize()

    m = LargeMultinomialLogitStep()
    m.choosers = 'obs'
    m.alternatives = 'alts'
    m.choice_column = 'choice'
    m.model_expression = 'obsval + altval'

    m.fit()
    assert (len(m.mergedchoicetable.to_frame()) == 190
            )  # 200 after fixing alt sampling

    m.chooser_sample_size = 5
    m.fit()
    assert (len(m.mergedchoicetable.to_frame()) == 95
            )  # 100 after fixing alt sampling

    m.name = 'mnl-test'
    m.register()

    mm.initialize()
    m = mm.get_step('mnl-test')
    assert (m.chooser_sample_size == 5)

    mm.remove_step('mnl-test')
Exemple #3
0
def wlcm_simulate():
    """
    Generate workplace location choices for the synthetic pop. This is just
    a temporary workaround until the model templates themselves can handle
    interaction terms. Otherwise the model template would normally not need
    an addtional orca step wrapper such as is defined here.

    """
    interaction_terms_tt = pd.read_csv(
        './data/WLCM_interaction_terms_tt.csv',
        index_col=['zone_id_home', 'zone_id_work'])
    interaction_terms_dist = pd.read_csv(
        './data/WLCM_interaction_terms_dist.csv',
        index_col=['zone_id_home', 'zone_id_work'])
    interaction_terms_cost = pd.read_csv(
        './data/WLCM_interaction_terms_cost.csv',
        index_col=['zone_id_home', 'zone_id_work'])

    m = mm.get_step('WLCM')

    m.run(chooser_batch_size=200000,
          interaction_terms=[
              interaction_terms_tt, interaction_terms_dist,
              interaction_terms_cost
          ])

    orca.broadcast('jobs', 'persons', cast_index=True, onto_on='job_id')
Exemple #4
0
def test_property_persistence(m):
    """
    Test persistence of properties across registration, saving, and reloading.
    
    """
    m.fit()
    m.name = 'my-model'
    m.tags = ['tag1']
    m.chooser_filters = 'filters1'
    m.chooser_sample_size = 100
    m.alt_filters = 'filter2'
    m.out_choosers = 'choosers2'
    m.out_alternatives = 'alts2'
    m.out_column = 'choices'
    m.out_chooser_filters = 'filters3'
    m.out_alt_filters = 'filters4'
    m.constrained_choices = True
    m.alt_capacity = 'cap'
    m.chooser_size = 'size'
    m.max_iter = 17
    
    d1 = m.to_dict()
    modelmanager.initialize()
    modelmanager.register(m)
    modelmanager.initialize()
    d2 = modelmanager.get_step('my-model').to_dict()
    
    assert d1 == d2
    modelmanager.remove_step('my-model')
Exemple #5
0
def auto_ownership_simulate(households):
    """
    Generate auto ownership choices for the synthetic pop households. The categories are:
    - 0: no vehicle
    - 1: one vehicle
    - 2: two vehicles
    - 3: three or more vehicles
    """

    # income bin dummies
    income_bins = pd.cut(
        orca.get_table('households').to_frame().income,
        bins=[0, 20000, 40000, 60000, 80000, 100000, 120000, np.inf],
        labels=['2', '4', '6', '8', '10', '12', '12p'],
        include_lowest=True)

    income_bin_dummies = pd.get_dummies(income_bins, prefix='income')

    for i in income_bin_dummies.columns:
        orca.add_column('households', i, income_bin_dummies[i])

    # load UrbanAccess transit accessibility variables
    parcels = orca.get_table('parcels').to_frame()
    am_acc = pd.read_csv('./data/access_indicators_ampeak.csv',
                         dtype={'block_id': str})
    am_acc.block_id = am_acc.block_id.str.zfill(15)
    parcels_with_acc = parcels.merge(
        am_acc, how='left', on='block_id').reindex(
            index=parcels.index)  # reorder to align with parcels table

    for acc_col in set(parcels_with_acc.columns) - set(parcels):
        # fill NA with median value
        orca.add_column(
            'parcels', acc_col, parcels_with_acc[acc_col].fillna(
                parcels_with_acc[acc_col].median()))

    @orca.table(cache=False)
    def hh_merged():
        df = orca.merge_tables(target='households',
                               tables=[
                                   'households', 'units', 'buildings',
                                   'parcels', 'nodessmall', 'nodeswalk'
                               ])
        return df

    m = mm.get_step('auto_ownership')

    # remove filters, specify out table, out column

    m.filters = None
    m.out_table = 'households'
    m.out_column = 'cars_alt'

    m.run()
Exemple #6
0
def TOD_choice_simulate():
    """
    Generate time of day period choices for the synthetic population
    home-work and work-home trips.
    
    """
    TOD_obs = orca.merge_tables('persons', ['persons', 'households', 'jobs'])

    TOD_obs.dropna(inplace=True)

    skims = pd.read_csv('./data/skims_110118.csv')

    TOD_obs = pd.merge(TOD_obs,
                       skims,
                       how='left',
                       left_on=['zone_id_home', 'zone_id_work'],
                       right_on=['orig', 'dest'])

    TOD_obs = pd.merge(TOD_obs,
                       skims,
                       how='left',
                       left_on=['zone_id_work', 'zone_id_home'],
                       right_on=['orig', 'dest'],
                       suffixes=('_HW', '_WH'))

    TOD_list = ['EA', 'AM', 'MD', 'PM', 'EV']

    for tod1 in TOD_list:
        for tod2 in TOD_list:
            col_name = f'da_Time_{tod1}_{tod2}'
            TOD_obs[col_name] = TOD_obs[f'da_Time_{tod1}_HW'] + TOD_obs[
                f'da_Time_{tod2}_WH']

    # TOD_obs['TOD'] = None

    m = mm.get_step('TOD_choice')

    @orca.table(cache=True)
    def tripsA():
        return TOD_obs

    m.run()

    results = orca.get_table('tripsA').to_frame()
    persons = orca.get_table('persons').to_frame()
    persons = pd.merge(persons,
                       results[['TOD']],
                       how='left',
                       left_index=True,
                       right_index=True)
    orca.add_table('persons', persons)
def test_property_persistence(m):
    """
    Test persistence of properties across registration, saving, and reloading.
    
    """
    m.name = 'test'
    m.tags = ['one', 'two']
    m.fit_all()
    d1 = m.to_dict()
    modelmanager.initialize()
    modelmanager.register(m)
    modelmanager.initialize()
    d2 = modelmanager.get_step('test').to_dict()
    assert d1 == d2
    modelmanager.remove_step('test')
Exemple #8
0
def test_diagnostic_attributes(data):
    """
    Test that diagnostic attributes are available when expected.
    
    """
    m = LargeMultinomialLogitStep()
    m.choosers = 'obs'
    m.alternatives = 'alts'
    m.choice_column = 'choice'
    m.model_expression = 'obsval + altval'
    m.alt_sample_size = 10
    
    assert(m.model is None)
    assert(m.mergedchoicetable is None)
    assert(m.probabilities is None)
    assert(m.choices is None)
    
    m.fit()
    
    assert(isinstance(m.model, MultinomialLogitResults))
    
    len_mct = len(m.mergedchoicetable.to_frame())
    len_obs_alts = len(orca.get_table(m.choosers).to_frame()) * m.alt_sample_size
    
    assert(len_mct == len_obs_alts)
    
    name = m.name
    modelmanager.register(m)
    modelmanager.initialize()
    m = modelmanager.get_step(name)
    
    assert(isinstance(m.model, MultinomialLogitResults))

    m.run()

    len_mct = len(m.mergedchoicetable.to_frame())
    len_probs = len(m.probabilities)
    len_choices = len(m.choices)
    len_obs = len(orca.get_table(m.choosers).to_frame())
    len_obs_alts = len_obs * m.alt_sample_size
    
    assert(len_mct == len_obs_alts)
    assert(len_probs == len_obs_alts)
    assert(len_choices == len_obs)

    modelmanager.remove_step(name)
def test_ols(orca_session):
    """
    For now this just tests that the code runs.
    
    """
    modelmanager.initialize()

    m = OLSRegressionStep()
    m.tables = 'obs'
    m.model_expression = 'a ~ b'

    m.fit()

    m.name = 'ols-test'
    modelmanager.register(m)

    modelmanager.initialize()
    m = modelmanager.get_step('ols-test')

    modelmanager.remove_step('ols-test')
Exemple #10
0
def wlcm_simulate(persons):
    """
    Generate workplace location choices for the synthetic pop

    """
    interaction_terms_tt = pd.read_csv(
        './data/WLCM_interaction_terms_tt.csv', index_col=[
            'zone_id_home', 'zone_id_work'])
    interaction_terms_dist = pd.read_csv(
        './data/WLCM_interaction_terms_dist.csv', index_col=[
            'zone_id_home', 'zone_id_work'])
    interaction_terms_cost = pd.read_csv(
        './data/WLCM_interaction_terms_cost.csv', index_col=[
            'zone_id_home', 'zone_id_work'])

    m = mm.get_step(
        'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income')

    m.run(chooser_batch_size=200000, interaction_terms=[
        interaction_terms_tt, interaction_terms_dist, interaction_terms_cost])
Exemple #11
0
def test_binary_logit(orca_session):
    """
    For now this just tests that the code runs.
    
    """
    modelmanager.initialize()

    m = BinaryLogitStep()
    m.tables = 'obs'
    m.model_expression = 'b ~ a'
    
    m.fit()
    
    m.name = 'binary-test'
    modelmanager.register(m)
    
    modelmanager.initialize()
    m = modelmanager.get_step('binary-test')
    
    modelmanager.remove_step('binary-test')
Exemple #12
0
def test_property_persistence(orca_session):
    """
    Test persistence of properties across registration, saving, and reloading.
    
    """
    t = SaveTable()
    t.table = 'buildings'
    t.columns = ['window_panes', 'number_of_chimneys']
    t.filters = 'number_of_chimneys > 15'
    t.output_type = 'csv'
    t.path = 'data/buildings.csv'
    t.extra_settings = {'make_data_awesome': True}
    t.name = 'save-buildings-csv'
    t.tags = ['awesome', 'chimneys']

    d1 = t.to_dict()
    modelmanager.register(t)
    modelmanager.initialize()
    d2 = modelmanager.get_step(t.name).to_dict()

    assert d1 == d2
    modelmanager.remove_step(t.name)
def test_small_mnl(orca_session):
    """
    Test that the code runs, and that the model_expression is always available.
    
    """
    modelmanager.initialize()

    m = SmallMultinomialLogitStep()
    m.tables = ['households', 'buildings']
    m.choice_column = 'choice'
    m.model_expression = OrderedDict([('intercept', [1, 2]), ('a', [0, 2]),
                                      ('b', [0, 2])])

    m.fit()
    assert (m.model_expression is not None)

    print(m.model_expression)

    m.name = 'small-mnl-test'
    modelmanager.register(m)
    assert (m.model_expression is not None)

    print(m.model_expression)

    # TEST SIMULATION
    m.out_column = 'simulated_choice'

    m.run()
    print(orca.get_table('households').to_frame())

    modelmanager.initialize()
    m = modelmanager.get_step('small-mnl-test')
    assert (m.model_expression is not None)

    print(m.model_expression)

    modelmanager.remove_step('small-mnl-test')
Exemple #14
0
def test_observation_sampling(orca_session):
    modelmanager.initialize()

    m = LargeMultinomialLogitStep()
    m.choosers = 'obs'
    m.alternatives = 'alts'
    m.choice_column = 'choice'
    m.model_expression = 'obsval + altval'
    
    m.fit()
    assert(len(m.mergedchoicetable.to_frame()) == 200)
    
    m.chooser_sample_size = 5
    m.fit()
    assert(len(m.mergedchoicetable.to_frame()) == 100)
    
    m.name = 'mnl-test'
    modelmanager.register(m)
    
    modelmanager.initialize()
    m = modelmanager.get_step('mnl-test')
    assert(m.chooser_sample_size == 5)
    
    modelmanager.remove_step('mnl-test')
Exemple #15
0
def primary_mode_choice_simulate(persons):
    """
    Generate primary mode choices for the synthetic population. The choices are:
    - 0: drive alone
    - 1: shared
    - 2: walk-transit-walk
    - 3: drive-transit-walk
    - 4: walk-transit-drive
    - 5: bike
    - 6: walk
    """
    @orca.table(cache=True)
    def persons_CHTS_format():
        # use persons with jobs for persons
        persons = orca.get_table('persons').to_frame()
        persons.index.name = 'person_id'
        persons.reset_index(inplace=True)
        persons = persons[[
            'person_id', 'sex', 'age', 'race_id', 'worker', 'edu',
            'household_id', 'job_id', 'TOD'
        ]]

        hh_df = orca.get_table('households').to_frame().reset_index()[[
            'household_id', 'cars', 'tenure', 'income', 'persons',
            'building_id'
        ]]
        jobs_df = orca.get_table('jobs').to_frame().reset_index()[[
            'job_id', 'building_id'
        ]]
        buildings_df = orca.get_table('buildings').to_frame().reset_index()[[
            'building_id', 'parcel_id'
        ]]
        parcels_df = orca.get_table('parcels').to_frame().reset_index()[[
            'primary_id', 'zone_id'
        ]]
        parcels_df.rename(columns={'primary_id': 'parcel_id'}, inplace=True)

        # rename columns/change values to match CHTS
        persons.columns = [
            'person_id', 'GEND', 'AGE', 'RACE1', 'JOBS', 'EDUCA',
            'household_id', 'job_id', 'TOD'
        ]
        persons.RACE1 = persons.RACE1.map({
            1: 1,
            2: 2,
            3: 3,
            4: 3,
            5: 3,
            6: 4,
            7: 5,
            8: 97,
            9: 97
        })
        persons.EDUCA = persons.EDUCA.map({
            0: 1,
            1: 1,
            2: 1,
            3: 1,
            4: 1,
            5: 1,
            6: 1,
            7: 1,
            8: 1,
            9: 1,
            10: 1,
            11: 1,
            12: 1,
            13: 1,
            14: 1,
            15: 1,
            16: 2,
            17: 2,
            18: 3,
            19: 3,
            20: 4,
            21: 5,
            22: 6,
            23: 6,
            24: 6
        })
        persons.TOD = persons.TOD.map({
            2: 'EA',
            3: 'EA',
            12: 'AM',
            14: 'AM',
            22: 'MD',
            23: 'MD',
            24: 'MD'
        })

        # read skim
        skim = pd.read_csv(
            '/home/emma/ual_model_workspace/fall-2018-models/skims_110118.csv',
            index_col=0)

        skim.columns = skim.columns.str.replace(
            '_distance', '_Distance')  # capitalization issues
        skim.columns = skim.columns.str.replace('_cost', '_Cost')

        EA_skim = skim[['orig', 'dest'] + list(skim.filter(like='EA').columns)]
        EA_skim.columns = EA_skim.columns.str.replace('_EA', '')
        EA_skim['TOD'] = 'EA'
        AM_skim = skim[['orig', 'dest'] + list(skim.filter(like='AM').columns)]
        AM_skim.columns = AM_skim.columns.str.replace('_AM', '')
        AM_skim['TOD'] = 'AM'
        MD_skim = skim[['orig', 'dest'] + list(skim.filter(like='MD').columns)]
        MD_skim.columns = MD_skim.columns.str.replace('_MD', '')
        MD_skim['TOD'] = 'MD'

        skim_combined = pd.concat([EA_skim, AM_skim, MD_skim])

        MTC_acc = pd.read_csv('./data/MTC_TAZ_accessibility.csv')

        # merge attributes onto persons
        # want household as origin zone and job as destination zone.

        hh_df = hh_df.merge(buildings_df, how='left',
                            on='building_id').merge(parcels_df,
                                                    how='left',
                                                    on='parcel_id')
        hh_df.rename(columns={'zone_id': 'orig'}, inplace=True)

        jobs_df = jobs_df.merge(buildings_df, how='left',
                                on='building_id').merge(parcels_df,
                                                        how='left',
                                                        on='parcel_id')
        jobs_df.rename(columns={'zone_id': 'dest'}, inplace=True)

        persons = persons.merge(hh_df, how='left', on='household_id')
        persons.drop(['building_id', 'parcel_id'], axis=1, inplace=True)

        persons = persons.merge(jobs_df, how='inner', on='job_id')
        persons.drop(['building_id', 'parcel_id'], axis=1, inplace=True)

        persons = persons.merge(MTC_acc,
                                how='left',
                                left_on='orig',
                                right_on='taz1454')
        persons[MTC_acc.columns] = persons[MTC_acc.columns].fillna(0)

        persons = persons.merge(skim_combined,
                                how='left',
                                on=['orig', 'dest', 'TOD'])

        # rename the remaning attributes
        persons['OWN'] = (persons['tenure'] == 1).astype(int)
        persons.rename(columns={
            'cars': 'HHVEH',
            'income': 'INCOM',
            'persons': 'HHSIZ'
        },
                       inplace=True)
        return persons

    m = mm.get_step('primary_mode_choice')

    # remove filters, specify out table, out column
    m.filters = None
    m.out_filters = None
    m.tables = ['persons_CHTS_format']
    m.out_tables = 'persons_CHTS_format'
    m.out_column = 'primary_commute_mode'

    m.run()