def workplace_location_simulate(set_random_seed, persons_merged, workplace_location_spec, skims, destination_size_terms): # for now I'm going to generate a workplace location for everyone - # presumably it will not get used in downstream models for everyone - # it should depend on CDAP and mandatory tour generation as to whethrer # it gets used choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction skims.set_keys("TAZ", "TAZ_r") # the skims will be available under the name "skims" for any @ expressions locals_d = {"skims": skims} choices, _ = asim.interaction_simulate(choosers, alternatives, workplace_location_spec, skims=skims, locals_d=locals_d, sample_size=50) choices = choices.reindex(persons_merged.index) print "Describe of choices:\n", choices.describe() orca.add_column("persons", "workplace_taz", choices) add_dependent_columns("persons", "persons_workplace")
def non_mandatory_tour_frequency(set_random_seed, persons_merged, non_mandatory_tour_frequency_alts, non_mandatory_tour_frequency_spec): choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity.isin(['Mandatory', 'NonMandatory'])] print "%d persons run for non-mandatory tour model" % len(choosers) choices_list = [] # segment by person type and pick the right spec for each person type for name, segment in choosers.groupby('ptype_cat'): print "Running segment '%s' of size %d" % (name, len(segment)) choices, _ = asim.interaction_simulate( segment, non_mandatory_tour_frequency_alts.to_frame(), # notice that we pick the column for the # segment for each segment we run non_mandatory_tour_frequency_spec[[name]]) choices_list.append(choices) choices = pd.concat(choices_list) print "Choices:\n", choices.value_counts() orca.add_column("persons", "non_mandatory_tour_frequency", choices)
def non_mandatory_tour_frequency(set_random_seed, persons_merged, non_mandatory_tour_frequency_alts, non_mandatory_tour_frequency_spec): choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity.isin( ['Mandatory', 'NonMandatory'])] print "%d persons run for non-mandatory tour model" % len(choosers) choices_list = [] # segment by person type and pick the right spec for each person type for name, segment in choosers.groupby('ptype_cat'): print "Running segment '%s' of size %d" % (name, len(segment)) choices, _ = asim.interaction_simulate( segment, non_mandatory_tour_frequency_alts.to_frame(), # notice that we pick the column for the # segment for each segment we run non_mandatory_tour_frequency_spec[[name]], sample_size=50) choices_list.append(choices) choices = pd.concat(choices_list) print "Choices:\n", choices.value_counts() orca.add_column("persons", "non_mandatory_tour_frequency", choices) add_dependent_columns("persons", "persons_nmtf")
def destination_choice(set_random_seed, non_mandatory_tours_merged, skims, destination_choice_spec, destination_size_terms): # choosers are tours - in a sense tours are choosing their destination choosers = non_mandatory_tours_merged.to_frame() alternatives = destination_size_terms.to_frame() spec = destination_choice_spec.to_frame() # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction skims.set_keys("TAZ", "TAZ_r") # the skims will be available under the name "skims" for any @ expressions locals_d = {"skims": skims} choices_list = [] # segment by trip type and pick the right spec for each person type for name, segment in choosers.groupby('tour_type'): # FIXME - there are two options here escort with kids and without if name == "escort": # FIXME just run one of the other models for now name = "shopping" # the segment is now available to switch between size terms locals_d['segment'] = name print spec.columns print name print "Running segment '%s' of size %d" % (name, len(segment)) choices, _ = asim.interaction_simulate(segment, alternatives, spec[[name]], skims=skims, locals_d=locals_d) choices_list.append(choices) choices = pd.concat(choices_list) print "Choices:\n", choices.describe() # every trip now has a destination which is the index from the # alternatives table - in this case it's the destination taz orca.add_column("non_mandatory_tours", "destination", choices)
def workplace_location_simulate(set_random_seed, persons_merged, workplace_location_spec, skims, destination_size_terms, chunk_size): """ The workplace location model predicts the zones in which various people will work. """ # for now I'm going to generate a workplace location for everyone - # presumably it will not get used in downstream models for everyone - # it should depend on CDAP and mandatory tour generation as to whethrer # it gets used choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction skims.set_keys("TAZ", "TAZ_r") # the skims will be available under the name "skims" for any @ expressions locals_d = {"skims": skims} # FIXME - HACK - only include columns actually used in spec (which we pathologically know) choosers = choosers[["income_segment", "TAZ", "mode_choice_logsums"]] choices = asim.interaction_simulate(choosers, alternatives, workplace_location_spec, skims=skims, locals_d=locals_d, sample_size=50, chunk_size=chunk_size) # FIXME - no need to reindex? choices = choices.reindex(persons_merged.index) logger.info("%s workplace_taz choices min: %s max: %s" % (len(choices.index), choices.min(), choices.max())) tracing.print_summary('workplace_taz', choices, describe=True) orca.add_column("persons", "workplace_taz", choices) add_dependent_columns("persons", "persons_workplace")
def school_location_simulate(set_random_seed, persons_merged, school_location_spec, skims, destination_size_terms): choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() spec = school_location_spec.to_frame() # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction skims.set_keys("TAZ", "TAZ_r") # the skims will be available under the name "skims" for any @ expressions locals_d = {"skims": skims} choices_list = [] for school_type in ['university', 'highschool', 'gradeschool']: locals_d['segment'] = school_type choosers_segment = choosers[choosers["is_" + school_type]] choices, _ = asim.interaction_simulate(choosers_segment, alternatives, spec[[school_type]], skims=skims, locals_d=locals_d, sample_size=50) choices_list.append(choices) choices = pd.concat(choices_list) # this fillna is necessary to avoid a downstream crash and might be a bit # wrong logically. The issue here is that there is a small but non-zero # chance to choose a school trip even if not of the school type (because # of -999 rather than outright removal of alternative availability). - # this fills in the location for those uncommon circumstances, # so at least it runs choices = choices.reindex(persons_merged.index).fillna(-1) print "Describe of choices:\n", choices.describe() orca.add_column("persons", "school_taz", choices) add_dependent_columns("persons", "persons_school")
def destination_choice(set_random_seed, non_mandatory_tours_merged, skims, destination_choice_spec, destination_size_terms): # choosers are tours - in a sense tours are choosing their destination choosers = non_mandatory_tours_merged.to_frame() alternatives = destination_size_terms.to_frame() spec = destination_choice_spec.to_frame() # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction skims.set_keys("TAZ", "TAZ_r") # the skims will be available under the name "skims" for any @ expressions locals_d = {"skims": skims} choices_list = [] # segment by trip type and pick the right spec for each person type for name, segment in choosers.groupby('tour_type'): # FIXME - there are two options here escort with kids and without if name == "escort": # FIXME just run one of the other models for now name = "shopping" # the segment is now available to switch between size terms locals_d['segment'] = name print "Running segment '%s' of size %d" % (name, len(segment)) choices, _ = asim.interaction_simulate(segment, alternatives, spec[[name]], skims=skims, locals_d=locals_d, sample_size=50) choices_list.append(choices) choices = pd.concat(choices_list) print "Choices:\n", choices.describe() # every trip now has a destination which is the index from the # alternatives table - in this case it's the destination taz orca.add_column("non_mandatory_tours", "destination", choices)
def destination_choice(set_random_seed, non_mandatory_tours_merged, skim_dict, destination_choice_spec, destination_choice_settings, destination_size_terms, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ # choosers are tours - in a sense tours are choosing their destination choosers = non_mandatory_tours_merged.to_frame() alternatives = destination_size_terms.to_frame() spec = destination_choice_spec.to_frame() constants = get_model_constants(destination_choice_settings) if trace_hh_id: # register non_mandatory_tours so we can slice utilities tracing.register_tours(choosers, trace_hh_id) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", "TAZ_r") locals_d = { 'skims': skims } if constants is not None: locals_d.update(constants) logger.info("Running destination_choice with %d non_mandatory_tours" % len(choosers.index)) choices_list = [] # segment by trip type and pick the right spec for each person type for name, segment in choosers.groupby('tour_type'): # FIXME - there are two options here escort with kids and without kludge_name = name if name == "escort": logging.error("destination_choice escort not implemented - running shopping instead") kludge_name = "shopping" # the segment is now available to switch between size terms locals_d['segment'] = kludge_name # FIXME - no point in considering impossible alternatives alternatives_segment = alternatives[alternatives[kludge_name] > 0] logger.info("Running segment '%s' of %d tours %d alternatives" % (name, len(segment), len(alternatives_segment))) # name index so tracing knows how to slice segment.index.name = 'tour_id' choices = asim.interaction_simulate(segment, alternatives_segment, spec[[kludge_name]], skims=skims, locals_d=locals_d, sample_size=50, chunk_size=chunk_size, trace_label='destination.%s' % name) choices_list.append(choices) choices = pd.concat(choices_list) # FIXME - can there be null destinations? if choices.isnull().any(): logger.error("destination_choice had %s null destinations" % choices.isnull().sum()) assert choices.isnull().sum() == 0 tracing.print_summary('destination', choices, describe=True) # every trip now has a destination which is the index from the # alternatives table - in this case it's the destination taz orca.add_column("non_mandatory_tours", "destination", choices) if trace_hh_id: tracing.trace_df(orca.get_table('non_mandatory_tours').to_frame(), label="destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def school_location_simulate(set_random_seed, persons_merged, school_location_spec, school_location_settings, skims, destination_size_terms, chunk_size, trace_hh_id): """ The school location model predicts the zones in which various people will go to school. """ choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() constants = get_model_constants(school_location_settings) logger.info("Running school_location_simulate with %d persons" % len(choosers)) # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims.set_keys("TAZ", "TAZ_r") locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) choices_list = [] for school_type in ['university', 'highschool', 'gradeschool']: locals_d['segment'] = school_type choosers_segment = choosers[choosers["is_" + school_type]] # FIXME - no point in considering impossible alternatives alternatives_segment = alternatives[alternatives[school_type] > 0] logger.info( "school_type %s: %s persons %s alternatives" % (school_type, len(choosers_segment), len(alternatives_segment))) if len(choosers_segment.index) > 0: choices = asim.interaction_simulate( choosers_segment, alternatives_segment, spec=school_location_spec[[school_type]], skims=skims, locals_d=locals_d, sample_size=50, chunk_size=chunk_size, trace_label='school_location.%s' % school_type, trace_choice_name='school_location') choices_list.append(choices) choices = pd.concat(choices_list) # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location choices = choices.reindex(persons_merged.index).fillna(-1) tracing.print_summary('school_taz', choices, describe=True) orca.add_column("persons", "school_taz", choices) add_dependent_columns("persons", "persons_school") if trace_hh_id: trace_columns = ['school_taz' ] + orca.get_table('persons_school').columns tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="school_location", columns=trace_columns, warn_if_empty=True)
def vectorize_tour_scheduling(tours, alts, spec): """ The purpose of this method is fairly straightforward - it takes tours and schedules them into time slots. Alternatives should be specified so as to define those time slots (usually with start and end times). The difficulty of doing this in Python is that subsequent tours are dependent on certain characteristics of previous tours for the same person. This is a problem with Python's vectorization requirement, so this method does all the 1st tours, then all the 2nd tours, and so forth. This method also adds variables that can be used in the spec which have to do with the previous tours per person. Every column in the alternatives table is appended with the suffix "_previous" and made available. So if your alternatives table has columns for start and end, then start_previous and end_previous will be set to the start and end of the most recent tour for a person. The first time through, start_previous and end_previous are undefined, so make sure to protect with a tour_num >= 2 in the variable computation. Parameters ---------- tours : DataFrame DataFrame of tours containing tour attributes, as well as a person_id column to define the nth tour for each person. alts : DataFrame DataFrame of alternatives which represent time slots. Will be passed to interaction_simulate in batches for each nth tour. spec : DataFrame The spec which will be passed to interaction_simulate. Returns ------- choices : Series A Series of choices where the index is the index of the tours DataFrame and the values are the index of the alts DataFrame. """ max_num_trips = tours.groupby('person_id').size().max() # because this is Python, we have to vectorize everything by doing the # "nth" trip for each person in a for loop (in other words, because each # trip is dependent on the time windows left by the previous decision) - # hopefully this will work out ok! choices = [] # keep a series of the the most recent tours for each person previous_tour_by_personid = pd.Series( pd.Series(alts.index).iloc[0], index=tours.person_id.unique()) for i in range(max_num_trips): # this reset_index / set_index stuff keeps the index as the tours # index rather that switching to person_id as the index which is # what happens when you groupby person_id nth_tours = tours.reset_index().\ groupby('person_id').nth(i).reset_index().set_index('index') print "Running %d #%d tour choices" % (len(nth_tours), i+1) # tour num can be set by the user, but if it isn't we set it here if "tour_num" not in nth_tours: nth_tours["tour_num"] = i+1 nth_tours = nth_tours.join(get_previous_tour_by_tourid( nth_tours.person_id, previous_tour_by_personid, alts)) nth_choices, _ = asim.interaction_simulate( nth_tours, alts.copy(), spec, sample_size=min(len(alts), 50) ) choices.append(nth_choices) previous_tour_by_personid.loc[nth_tours.person_id] = nth_choices.values # return the concatenated choices return pd.concat(choices)
def school_location_simulate(set_random_seed, persons_merged, school_location_spec, skims, destination_size_terms, chunk_size): """ The school location model predicts the zones in which various people will go to school. """ choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() spec = school_location_spec.to_frame() # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction skims.set_keys("TAZ", "TAZ_r") # the skims will be available under the name "skims" for any @ expressions locals_d = {"skims": skims} choices_list = [] for school_type in ['university', 'highschool', 'gradeschool']: logger.info("Running school_type %s" % school_type) locals_d['segment'] = school_type choosers_segment = choosers[choosers["is_" + school_type]] choices = asim.interaction_simulate(choosers_segment, alternatives, spec[[school_type]], skims=skims, locals_d=locals_d, sample_size=50, chunk_size=chunk_size) choices_list.append(choices) choices = pd.concat(choices_list) # this fillna is necessary to avoid a downstream crash and might be a bit # wrong logically. The issue here is that there is a small but non-zero # chance to choose a school trip even if not of the school type (because # of -999 rather than outright removal of alternative availability). - # this fills in the location for those uncommon circumstances, # so at least it runs if np.isnan(choices).any(): logger.warn("Converting %s nan school_taz choices to -1" % (np.isnan(choices).sum(), len(choices.index))) choices = choices.reindex(persons_merged.index).fillna(-1) logger.info("%s school_taz choices min: %s max: %s" % (len(choices.index), choices.min(), choices.max())) tracing.print_summary('school_taz', choices, describe=True) orca.add_column("persons", "school_taz", choices) add_dependent_columns("persons", "persons_school")
def workplace_location_simulate(set_random_seed, persons_merged, workplace_location_spec, workplace_location_settings, skims, destination_size_terms, chunk_size, trace_hh_id): """ The workplace location model predicts the zones in which various people will work. """ # for now I'm going to generate a workplace location for everyone - # presumably it will not get used in downstream models for everyone - # it should depend on CDAP and mandatory tour generation as to whether # it gets used choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() constants = get_model_constants(workplace_location_settings) logger.info("Running workplace_location_simulate with %d persons" % len(choosers)) # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims.set_keys("TAZ", "TAZ_r") locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) # FIXME - HACK - only include columns actually used in spec (which we pathologically know) choosers = choosers[["income_segment", "TAZ", "mode_choice_logsums"]] choices = asim.interaction_simulate(choosers, alternatives, spec=workplace_location_spec, skims=skims, locals_d=locals_d, sample_size=50, chunk_size=chunk_size, trace_label=trace_hh_id and 'workplace_location', trace_choice_name='workplace_location') # FIXME - no need to reindex? choices = choices.reindex(persons_merged.index) logger.info("%s workplace_taz choices min: %s max: %s" % (len(choices.index), choices.min(), choices.max())) tracing.print_summary('workplace_taz', choices, describe=True) orca.add_column("persons", "workplace_taz", choices) add_dependent_columns("persons", "persons_workplace") if trace_hh_id: trace_columns = ['workplace_taz' ] + orca.get_table('persons_workplace').columns tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="workplace_location", columns=trace_columns, warn_if_empty=True)
def non_mandatory_tour_frequency(set_random_seed, persons_merged, non_mandatory_tour_frequency_alts, non_mandatory_tour_frequency_spec, non_mandatory_tour_frequency_settings, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ t0 = print_elapsed_time() choosers = persons_merged.to_frame() alts = non_mandatory_tour_frequency_alts.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] logger.info("Running non_mandatory_tour_frequency with %d persons" % len(choosers)) constants = get_model_constants(non_mandatory_tour_frequency_settings) choices_list = [] # segment by person type and pick the right spec for each person type for name, segment in choosers.groupby('ptype_cat'): logger.info("Running segment '%s' of size %d" % (name, len(segment))) choices = asim.interaction_simulate( segment, alts, # notice that we pick the column for the segment for each segment we run spec=non_mandatory_tour_frequency_spec[[name]], locals_d=constants, sample_size=50, chunk_size=chunk_size, trace_label=trace_hh_id and 'non_mandatory_tour_frequency.%s' % name, trace_choice_name='non_mandatory_tour_frequency') choices_list.append(choices) t0 = print_elapsed_time("non_mandatory_tour_frequency.%s" % name, t0) # FIXME - force garbage collection # mem = asim.memory_info() # logger.info('memory_info ptype %s, %s' % (name, mem)) choices = pd.concat(choices_list) # FIXME - no need to reindex? orca.add_column("persons", "non_mandatory_tour_frequency", choices) add_dependent_columns("persons", "persons_nmtf") if trace_hh_id: trace_columns = ['non_mandatory_tour_frequency'] tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="non_mandatory_tour_frequency", columns=trace_columns, warn_if_empty=True)
def destination_choice(set_random_seed, non_mandatory_tours_merged, skims, destination_choice_spec, destination_size_terms, chunk_size): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ # choosers are tours - in a sense tours are choosing their destination choosers = non_mandatory_tours_merged.to_frame() alternatives = destination_size_terms.to_frame() spec = destination_choice_spec.to_frame() # set the keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction skims.set_keys("TAZ", "TAZ_r") # the skims will be available under the name "skims" for any @ expressions locals_d = {"skims": skims} logger.info("%s destination_choice choosers" % len(choosers.index)) choices_list = [] # segment by trip type and pick the right spec for each person type for name, segment in choosers.groupby('tour_type'): # FIXME - there are two options here escort with kids and without if name == "escort": logger.error("destination_choice escort not implemented - running shopping instead") name = "shopping" # the segment is now available to switch between size terms locals_d['segment'] = name logger.info("Running segment '%s' of size %d" % (name, len(segment))) choices = asim.interaction_simulate(segment, alternatives, spec[[name]], skims=skims, locals_d=locals_d, sample_size=50, chunk_size=chunk_size) choices_list.append(choices) choices = pd.concat(choices_list) # FIXME - can there be null destinations? if choices.isnull().any(): logger.error("destination_choice had %s null destinations" % choices.isnull().sum()) assert choices.isnull().sum() == 0 tracing.print_summary('destination', choices, describe=True) # every trip now has a destination which is the index from the # alternatives table - in this case it's the destination taz orca.add_column("non_mandatory_tours", "destination", choices)