def auto_ownership_simulate(households_merged, auto_ownership_spec, auto_ownership_settings, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ logger.info("Running auto_ownership_simulate with %d households" % len(households_merged)) nest_spec = config.get_logit_model_settings(auto_ownership_settings) constants = config.get_model_constants(auto_ownership_settings) choices = asim.simple_simulate(choosers=households_merged.to_frame(), spec=auto_ownership_spec, nest_spec=nest_spec, locals_d=constants, trace_label=trace_hh_id and 'auto_ownership', trace_choice_name='auto_ownership') tracing.print_summary('auto_ownership', choices, value_counts=True) orca.add_column('households', 'auto_ownership', choices) pipeline.add_dependent_columns('households', 'households_autoown') if trace_hh_id: trace_columns = ['auto_ownership' ] + orca.get_table('households_autoown').columns tracing.trace_df(orca.get_table('households').to_frame(), label='auto_ownership', columns=trace_columns, warn_if_empty=True)
def cdap_simulate(persons_merged, cdap_settings, cdap_indiv_spec, cdap_interaction_coefficients, cdap_fixed_relative_proportions, chunk_size, trace_hh_id): """ CDAP stands for Coordinated Daily Activity Pattern, which is a choice of high-level activity pattern for each person, in a coordinated way with other members of a person's household. Because Python requires vectorization of computation, there are some specialized routines in the cdap directory of activitysim for this purpose. This module simply applies those utilities using the simulation framework. """ persons_df = persons_merged.to_frame() constants = config.get_model_constants(cdap_settings) logger.info("Running cdap_simulate with %d persons" % len(persons_df.index)) choices = run_cdap( persons=persons_df, cdap_indiv_spec=cdap_indiv_spec, cdap_interaction_coefficients=cdap_interaction_coefficients, cdap_fixed_relative_proportions=cdap_fixed_relative_proportions, locals_d=constants, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label='cdap') tracing.print_summary('cdap_activity', choices.cdap_activity, value_counts=True) print pd.crosstab(persons_df.ptype, choices.cdap_activity, margins=True) choices = choices.reindex(persons_merged.index) orca.add_column("persons", "cdap_activity", choices.cdap_activity) orca.add_column("persons", "cdap_rank", choices.cdap_rank) pipeline.add_dependent_columns("persons", "persons_cdap") pipeline.add_dependent_columns("households", "households_cdap") if trace_hh_id: tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="cdap", columns=['ptype', 'cdap_rank', 'cdap_activity'], warn_if_empty=True)
def mandatory_tour_frequency(persons_merged, mandatory_tour_frequency_spec, mandatory_tour_frequency_settings, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers)) nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings) constants = config.get_model_constants(mandatory_tour_frequency_settings) choices = simulate.simple_simulate( choosers, spec=mandatory_tour_frequency_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( mandatory_tour_frequency_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True) inject.add_column("persons", "mandatory_tour_frequency", choices) create_mandatory_tours(trace_hh_id) # add mandatory_tour-dependent columns (e.g. tour counts) to persons pipeline.add_dependent_columns("persons", "persons_mtf") if trace_hh_id: trace_columns = ['mandatory_tour_frequency'] tracing.trace_df(inject.get_table('persons').to_frame(), label="mandatory_tour_frequency.persons", # columns=trace_columns, warn_if_empty=True)
def mandatory_tour_frequency(persons_merged, mandatory_tour_frequency_spec, mandatory_tour_frequency_settings, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers)) nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings) constants = config.get_model_constants(mandatory_tour_frequency_settings) choices = asim.simple_simulate( choosers, spec=mandatory_tour_frequency_spec, nest_spec=nest_spec, locals_d=constants, trace_label=trace_hh_id and 'mandatory_tour_frequency', trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( mandatory_tour_frequency_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True) orca.add_column("persons", "mandatory_tour_frequency", choices) pipeline.add_dependent_columns("persons", "persons_mtf") create_mandatory_tours_table() # FIXME - test prng repeatability r = pipeline.get_rn_generator().random_for_df(choices) orca.add_column("persons", "mtf_rand", [item for sublist in r for item in sublist]) if trace_hh_id: trace_columns = ['mandatory_tour_frequency'] tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="mandatory_tour_frequency", columns=trace_columns, warn_if_empty=True)
def school_location_simulate(persons_merged, school_location_sample, school_location_spec, school_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ School location model on school_location_sample annotated with mode_choice logsum to select a school_taz from sample alternatives """ choosers = persons_merged.to_frame() school_location_sample = school_location_sample.to_frame() destination_size_terms = destination_size_terms.to_frame() trace_label = 'school_location_simulate' alt_col_name = school_location_settings["ALT_COL_NAME"] constants = config.get_model_constants(school_location_settings) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", alt_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers') choices_list = [] for school_type in ['university', 'highschool', 'gradeschool']: locals_d['segment'] = school_type choosers_segment = choosers[choosers["is_" + school_type]] alts_segment = school_location_sample[ school_location_sample['school_type'] == school_type] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list alts_segment = \ pd.merge(alts_segment, destination_size_terms, left_on=alt_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alts_segment, trace_label, '%s_alternatives' % school_type) choices = interaction_sample_simulate( choosers_segment, alts_segment, spec=school_location_spec[[school_type]], choice_column=alt_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, school_type), trace_choice_name='school_location') choices_list.append(choices) choices = pd.concat(choices_list) # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location choices = choices.reindex(persons_merged.index).fillna(-1).astype(int) tracing.dump_df(DUMP, choices, trace_label, 'choices') tracing.print_summary('school_taz', choices, describe=True) inject.add_column("persons", "school_taz", choices) pipeline.add_dependent_columns("persons", "persons_school") if trace_hh_id: trace_columns = ['school_taz' ] + inject.get_table('persons_school').columns tracing.trace_df(inject.get_table('persons_merged').to_frame(), label="school_location", columns=trace_columns, warn_if_empty=True)
def non_mandatory_tour_frequency(persons_merged, non_mandatory_tour_frequency_alts, non_mandatory_tour_frequency_spec, non_mandatory_tour_frequency_settings, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ t0 = print_elapsed_time() choosers = persons_merged.to_frame() alts = non_mandatory_tour_frequency_alts.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] logger.info("Running non_mandatory_tour_frequency with %d persons" % len(choosers)) constants = config.get_model_constants(non_mandatory_tour_frequency_settings) choices_list = [] # segment by person type and pick the right spec for each person type for name, segment in choosers.groupby('ptype_cat'): logger.info("Running segment '%s' of size %d" % (name, len(segment))) choices = asim.interaction_simulate( segment, alts, # notice that we pick the column for the segment for each segment we run spec=non_mandatory_tour_frequency_spec[[name]], locals_d=constants, chunk_size=chunk_size, trace_label=trace_hh_id and 'non_mandatory_tour_frequency.%s' % name, trace_choice_name='non_mandatory_tour_frequency') choices_list.append(choices) t0 = print_elapsed_time("non_mandatory_tour_frequency.%s" % name, t0) # FIXME - force garbage collection # mem = asim.memory_info() # logger.info('memory_info ptype %s, %s' % (name, mem)) choices = pd.concat(choices_list) # FIXME - no need to reindex? orca.add_column("persons", "non_mandatory_tour_frequency", choices) create_non_mandatory_tours_table() pipeline.add_dependent_columns("persons", "persons_nmtf") if trace_hh_id: trace_columns = ['non_mandatory_tour_frequency'] tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="non_mandatory_tour_frequency", columns=trace_columns, warn_if_empty=True)
def workplace_location_simulate(persons_merged, workplace_location_sample, workplace_location_spec, workplace_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ Workplace location model on workplace_location_sample annotated with mode_choice logsum to select a work_taz from sample alternatives """ # for now I'm going to generate a workplace location for everyone - # presumably it will not get used in downstream models for everyone - # it should depend on CDAP and mandatory tour generation as to whether # it gets used choosers = persons_merged.to_frame() alt_col_name = workplace_location_settings["ALT_COL_NAME"] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list workplace_location_sample = workplace_location_sample.to_frame() destination_size_terms = destination_size_terms.to_frame() alternatives = \ pd.merge(workplace_location_sample, destination_size_terms, left_on=alt_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alternatives, 'workplace_location_simulate', 'alternatives') constants = config.get_model_constants(workplace_location_settings) sample_pool_size = len(destination_size_terms.index) logger.info("Running workplace_location_simulate with %d persons" % len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", alt_col_name) locals_d = {'skims': skims, 'sample_pool_size': float(sample_pool_size)} if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=workplace_location_spec, choice_column=alt_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_hh_id and 'workplace_location', trace_choice_name='workplace_location') # FIXME - no need to reindex since we didn't slice choosers # choices = choices.reindex(persons_merged.index) tracing.print_summary('workplace_taz', choices, describe=True) orca.add_column("persons", "workplace_taz", choices) pipeline.add_dependent_columns("persons", "persons_workplace") if trace_hh_id: trace_columns = ['workplace_taz' ] + orca.get_table('persons_workplace').columns tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="workplace_location", columns=trace_columns, warn_if_empty=True)
def workplace_location_simulate(persons_merged, workplace_location_spec, workplace_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ The workplace location model predicts the zones in which various people will work. """ # for now I'm going to generate a workplace location for everyone - # presumably it will not get used in downstream models for everyone - # it should depend on CDAP and mandatory tour generation as to whether # it gets used choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() constants = config.get_model_constants(workplace_location_settings) sample_size = workplace_location_settings["SAMPLE_SIZE"] logger.info("Running workplace_location_simulate with %d persons" % len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", "TAZ_r") locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) # FIXME - HACK - only include columns actually used in spec (which we pathologically know) choosers = choosers[["income_segment", "TAZ", "mode_choice_logsums"]] choices = asim.interaction_simulate(choosers, alternatives, spec=workplace_location_spec, skims=skims, locals_d=locals_d, sample_size=sample_size, chunk_size=chunk_size, trace_label=trace_hh_id and 'workplace_location', trace_choice_name='workplace_location') # FIXME - no need to reindex since we didn't slice choosers # choices = choices.reindex(persons_merged.index) tracing.print_summary('workplace_taz', choices, describe=True) orca.add_column("persons", "workplace_taz", choices) pipeline.add_dependent_columns("persons", "persons_workplace") # FIXME - test prng repeatability r = pipeline.get_rn_generator().random_for_df(choices) orca.add_column("persons", "work_location_rand", [item for sublist in r for item in sublist]) if trace_hh_id: trace_columns = ['workplace_taz' ] + orca.get_table('persons_workplace').columns tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="workplace_location", columns=trace_columns, warn_if_empty=True)
def school_location_simulate(persons_merged, school_location_spec, school_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ The school location model predicts the zones in which various people will go to school. """ choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() constants = config.get_model_constants(school_location_settings) sample_size = school_location_settings["SAMPLE_SIZE"] logger.info("Running school_location_simulate with %d persons" % len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", "TAZ_r") locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) choices_list = [] for school_type in ['university', 'highschool', 'gradeschool']: locals_d['segment'] = school_type choosers_segment = choosers[choosers["is_" + school_type]] # FIXME - no point in considering impossible alternatives alternatives_segment = alternatives[alternatives[school_type] > 0] logger.info( "school_type %s: %s persons %s alternatives" % (school_type, len(choosers_segment), len(alternatives_segment))) if len(choosers_segment.index) > 0: choices = asim.interaction_simulate( choosers_segment, alternatives_segment, spec=school_location_spec[[school_type]], skims=skims, locals_d=locals_d, sample_size=sample_size, chunk_size=chunk_size, trace_label='school_location.%s' % school_type, trace_choice_name='school_location') choices_list.append(choices) choices = pd.concat(choices_list) # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location choices = choices.reindex(persons_merged.index).fillna(-1) tracing.print_summary('school_taz', choices, describe=True) orca.add_column("persons", "school_taz", choices) pipeline.add_dependent_columns("persons", "persons_school") # FIXME - test prng repeatability r = pipeline.get_rn_generator().random_for_df(choices) orca.add_column("persons", "school_location_rand", [item for sublist in r for item in sublist]) if trace_hh_id: trace_columns = ['school_taz' ] + orca.get_table('persons_school').columns tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="school_location", columns=trace_columns, warn_if_empty=True)