def _location_sample(segment_name, choosers, alternatives, skims, estimator, model_settings, alt_dest_col_name, chunk_size, chunk_tag, trace_label): """ select a sample of alternative locations. Logsum calculations are expensive, so we build a table of persons * all zones and then select a sample subset of potential locations The sample subset is generated by making multiple choices (<sample_size> number of choices) which results in sample containing up to <sample_size> choices for each choose (e.g. person) and a pick_count indicating how many times that choice was selected for that chooser.) person_id, dest_zone_id, rand, pick_count 23750, 14, 0.565502716034, 4 23750, 16, 0.711135838871, 6 ... 23751, 12, 0.408038878552, 1 23751, 14, 0.972732479292, 2 """ assert not choosers.empty logger.info("Running %s with %d persons" % (trace_label, len(choosers.index))) sample_size = model_settings["SAMPLE_SIZE"] if config.setting('disable_destination_sampling', False) or (estimator and estimator.want_unsampled_alternatives): # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 locals_d = {'skims': skims, 'segment_size': segment_name} constants = config.get_model_constants(model_settings) locals_d.update(constants) spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=segment_name, estimator=estimator) # here since presumably we want this when called for either sample or presample log_alt_losers = config.setting('log_alt_losers', False) choices = interaction_sample(choosers, alternatives, spec=spec, sample_size=sample_size, alt_col_name=alt_dest_col_name, log_alt_losers=log_alt_losers, skims=skims, locals_d=locals_d, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) return choices
def _destination_sample(primary_purpose, trips, alternatives, model_settings, size_term_matrix, skims, alt_dest_col_name, estimator, chunk_size, chunk_tag, trace_label): """ Note: trips with no viable destination receive no sample rows (because we call interaction_sample with allow_zero_probs=True) All other trips will have one or more rows with pick_count summing to sample_size returns choices: pandas.DataFrame alt_dest prob pick_count trip_id 102829169 2898 0.002333 1 102829169 2901 0.004976 1 102829169 3193 0.002628 1 """ spec = simulate.spec_for_segment(model_settings, spec_id='DESTINATION_SAMPLE_SPEC', segment_name=primary_purpose, estimator=estimator) sample_size = model_settings['SAMPLE_SIZE'] if config.setting('disable_destination_sampling', False) or (estimator and estimator.want_unsampled_alternatives): # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 locals_dict = config.get_model_constants(model_settings).copy() # size_terms of destination zones are purpose-specific, and trips have various purposes # so the relevant size_term for each interaction_sample row # cannot be determined until after choosers are joined with alternatives # (unless we iterate over trip.purpose - which we could, though we are already iterating over trip_num) # so, instead, expressions determine row-specific size_term by a call to: size_terms.get(df.alt_dest, df.purpose) locals_dict.update({'size_terms': size_term_matrix}) locals_dict.update(skims) log_alt_losers = config.setting('log_alt_losers', False) choices = interaction_sample(choosers=trips, alternatives=alternatives, sample_size=sample_size, alt_col_name=alt_dest_col_name, log_alt_losers=log_alt_losers, allow_zero_probs=True, spec=spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) return choices
def atwork_subtour_destination_sample(tours, persons_merged, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): model_spec = simulate.read_model_spec( file_name=model_settings['SAMPLE_SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] constants = config.get_model_constants(model_settings) sample_size = model_settings['SAMPLE_SIZE'] if estimator: # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers)) # create wrapper with keys for this lookup - in this case there is a workplace_zone_id # in the choosers and a zone_id in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions dest_column_name = destination_size_terms.index.name skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap('workplace_zone_id', dest_column_name) locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) choices = interaction_sample(choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps choices['person_id'] = choosers.person_id return choices
def atwork_subtour_destination_sample(tours, persons_merged, atwork_subtour_destination_sample_spec, skim_dict, destination_size_terms, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_location_sample' model_settings = inject.get_injectable('atwork_subtour_destination_settings') persons_merged = persons_merged.to_frame() tours = tours.to_frame() tours = tours[tours.tour_category == 'subtour'] # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True) alternatives = destination_size_terms.to_frame() constants = config.get_model_constants(model_settings) sample_size = model_settings["SAMPLE_SIZE"] alt_col_name = model_settings["ALT_COL_NAME"] chooser_col_name = 'workplace_taz' logger.info("Running atwork_subtour_location_sample with %d persons" % len(choosers)) # create wrapper with keys for this lookup - in this case there is a workplace_taz # in the choosers and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(chooser_col_name, 'TAZ') locals_d = { 'skims': skims } if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] choices = interaction_sample( choosers, alternatives, sample_size=sample_size, alt_col_name=alt_col_name, spec=atwork_subtour_destination_sample_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) choices['person_id'] = choosers.person_id choices['workplace_taz'] = choosers.workplace_taz inject.add_table('atwork_subtour_destination_sample', choices)
def run_destination_sample( spec_segment_name, tours, persons_merged, model_settings, skim_dict, destination_size_terms, chunk_size, trace_label): model_spec_file_name = model_settings['SAMPLE_SPEC'] model_spec = simulate.read_model_spec(file_name=model_spec_file_name) model_spec = model_spec[[spec_segment_name]] # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left') # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] constants = config.get_model_constants(model_settings) sample_size = model_settings["SAMPLE_SIZE"] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] logger.info("running %s with %d tours", trace_label, len(choosers)) # create wrapper with keys for this lookup - in this case there is a workplace_taz # in the choosers and a TAZ in the alternatives which get merged during interaction # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') # the skims will be available under the name "skims" for any @ expressions origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] if origin_col_name == 'TAZ': origin_col_name = 'TAZ_chooser' skims = skim_dict.wrap(origin_col_name, 'TAZ') locals_d = { 'skims': skims } if constants is not None: locals_d.update(constants) choices = interaction_sample( choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps # (broadcasts person_id onto all alternatives sharing the same tour_id index value) choices['person_id'] = choosers.person_id return choices
def run_destination_sample(spec_segment_name, tours, persons_merged, model_settings, skim_dict, destination_size_terms, chunk_size, trace_label): model_spec_file_name = model_settings['SAMPLE_SPEC'] model_spec = simulate.read_model_spec(file_name=model_spec_file_name) model_spec = model_spec[[spec_segment_name]] # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left') # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] constants = config.get_model_constants(model_settings) sample_size = model_settings["SAMPLE_SIZE"] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] logger.info("running %s with %d tours", trace_label, len(choosers)) # create wrapper with keys for this lookup - in this case there is a workplace_taz # in the choosers and a TAZ in the alternatives which get merged during interaction # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') # the skims will be available under the name "skims" for any @ expressions origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] if origin_col_name == 'TAZ': origin_col_name = 'TAZ_chooser' skims = skim_dict.wrap(origin_col_name, 'TAZ') locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) choices = interaction_sample(choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps # (broadcasts person_id onto all alternatives sharing the same tour_id index value) choices['person_id'] = choosers.person_id return choices
def trip_destination_sample( primary_purpose, trips, alternatives, model_settings, size_term_matrix, skims, chunk_size, trace_hh_id, trace_label): """ Returns ------- destination_sample: pandas.dataframe choices_df from interaction_sample with (up to) sample_size alts for each chooser row index (non unique) is trip_id from trips (duplicated for each alt) and columns dest_taz, prob, and pick_count dest_taz: int alt identifier (dest_taz) from alternatives[<alt_col_name>] prob: float the probability of the chosen alternative pick_count : int number of duplicate picks for chooser, alt """ trace_label = tracing.extend_trace_label(trace_label, 'trip_destination_sample') spec = get_spec_for_purpose(model_settings, 'DESTINATION_SAMPLE_SPEC', primary_purpose) sample_size = model_settings["SAMPLE_SIZE"] alt_dest_col_name = model_settings["ALT_DEST"] logger.info("Running %s with %d trips", trace_label, trips.shape[0]) locals_dict = config.get_model_constants(model_settings).copy() locals_dict.update({ 'size_terms': size_term_matrix }) locals_dict.update(skims) destination_sample = interaction_sample( choosers=trips, alternatives=alternatives, sample_size=sample_size, alt_col_name=alt_dest_col_name, allow_zero_probs=True, spec=spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, trace_label=trace_label) return destination_sample
def workplace_location_sample(persons_merged, workplace_location_sample_spec, workplace_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ build a table of workers * all zones in order to select a sample of alternative work locations. PERID, dest_TAZ, rand, pick_count 23750, 14, 0.565502716034, 4 23750, 16, 0.711135838871, 6 ... 23751, 12, 0.408038878552, 1 23751, 14, 0.972732479292, 2 """ trace_label = 'workplace_location_sample' choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() constants = config.get_model_constants(workplace_location_settings) sample_size = workplace_location_settings["SAMPLE_SIZE"] alt_col_name = workplace_location_settings["ALT_COL_NAME"] logger.info("Running workplace_location_sample with %d persons" % len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", "TAZ_r") locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] choices = interaction_sample(choosers, alternatives, sample_size=sample_size, alt_col_name=alt_col_name, spec=workplace_location_sample_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) orca.add_table('workplace_location_sample', choices)
def atwork_subtour_destination_sample(tours, persons_merged, skim_dict, destination_size_terms, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_location_sample' model_settings = config.read_model_settings( 'atwork_subtour_destination.yaml') model_spec = simulate.read_model_spec( file_name='atwork_subtour_destination_sample.csv') # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] constants = config.get_model_constants(model_settings) sample_size = model_settings["SAMPLE_SIZE"] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers)) # create wrapper with keys for this lookup - in this case there is a workplace_taz # in the choosers and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap('workplace_taz', 'TAZ') locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) choices = interaction_sample(choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps choices['person_id'] = choosers.person_id return choices
def atwork_subtour_destination_sample( tours, persons_merged, skim_dict, destination_size_terms, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_location_sample' model_settings = config.read_model_settings('atwork_subtour_destination.yaml') model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination_sample.csv') # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] constants = config.get_model_constants(model_settings) sample_size = model_settings["SAMPLE_SIZE"] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers)) # create wrapper with keys for this lookup - in this case there is a workplace_taz # in the choosers and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap('workplace_taz', 'TAZ') locals_d = { 'skims': skims } if constants is not None: locals_d.update(constants) choices = interaction_sample( choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps choices['person_id'] = choosers.person_id return choices
def _destination_sample(spec_segment_name, choosers, destination_size_terms, skims, estimator, model_settings, alt_dest_col_name, chunk_size, chunk_tag, trace_label): model_spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=spec_segment_name, estimator=estimator) logger.info("running %s with %d tours", trace_label, len(choosers)) sample_size = model_settings['SAMPLE_SIZE'] if config.setting('disable_destination_sampling', False) or (estimator and estimator.want_unsampled_alternatives): # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 locals_d = {'skims': skims} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) log_alt_losers = config.setting('log_alt_losers', False) choices = interaction_sample(choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, log_alt_losers=log_alt_losers, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps # (broadcasts person_id onto all alternatives sharing the same tour_id index value) choices['person_id'] = choosers.person_id return choices
def school_location_sample(persons_merged, school_location_sample_spec, school_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ build a table of persons * all zones to select a sample of alternative school locations. PERID, dest_TAZ, rand, pick_count 23750, 14, 0.565502716034, 4 23750, 16, 0.711135838871, 6 ... 23751, 12, 0.408038878552, 1 23751, 14, 0.972732479292, 2 """ trace_label = 'school_location_sample' choosers = persons_merged.to_frame() alternatives = destination_size_terms.to_frame() constants = config.get_model_constants(school_location_settings) sample_size = school_location_settings["SAMPLE_SIZE"] alt_col_name = school_location_settings["ALT_COL_NAME"] logger.info("Running school_location_simulate with %d persons" % len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", "TAZ_r") locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] choices_list = [] for school_type in ['university', 'highschool', 'gradeschool']: locals_d['segment'] = school_type choosers_segment = choosers[choosers["is_" + school_type]] # FIXME - no point in considering impossible alternatives alternatives_segment = alternatives[alternatives[school_type] > 0] logger.info( "school_type %s: %s persons %s alternatives" % (school_type, len(choosers_segment), len(alternatives_segment))) if len(choosers_segment.index) > 0: choices = interaction_sample( choosers_segment, alternatives_segment, sample_size=sample_size, alt_col_name=alt_col_name, spec=school_location_sample_spec[[school_type]], skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, school_type)) choices['school_type'] = school_type choices_list.append(choices) choices = pd.concat(choices_list) inject.add_table('school_location_sample', choices)
def run_location_sample( segment_name, persons_merged, skim_dict, dest_size_terms, model_settings, chunk_size, trace_label): """ select a sample of alternative locations. Logsum calculations are expensive, so we build a table of persons * all zones and then select a sample subset of potential locations The sample subset is generated by making multiple choices (<sample_size> number of choices) which results in sample containing up to <sample_size> choices for each choose (e.g. person) and a pick_count indicating how many times that choice was selected for that chooser.) person_id, dest_TAZ, rand, pick_count 23750, 14, 0.565502716034, 4 23750, 16, 0.711135838871, 6 ... 23751, 12, 0.408038878552, 1 23751, 14, 0.972732479292, 2 """ assert not persons_merged.empty model_spec = simulate.read_model_spec(file_name=model_settings['SAMPLE_SPEC']) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = persons_merged[chooser_columns] alternatives = dest_size_terms sample_size = model_settings["SAMPLE_SIZE"] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] logger.info("Running %s with %d persons" % (trace_label, len(choosers.index))) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap('TAZ_chooser', 'TAZ') locals_d = { 'skims': skims, 'segment_size': segment_name } constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) choices = interaction_sample( choosers, alternatives, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=spec_for_segment(model_spec, segment_name), skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) return choices
def run_destination_sample(spec_segment_name, tours, households_merged, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=spec_segment_name, estimator=estimator) # choosers are tours - in a sense tours are choosing their destination choosers = pd.merge(tours, households_merged, left_on='household_id', right_index=True, how='left') # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] logger.info("running %s with %d tours", trace_label, len(choosers)) sample_size = model_settings["SAMPLE_SIZE"] if estimator: # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 # create wrapper with keys for this lookup - in this case there is a workplace_zone_id # in the choosers and a zone_id in the alternatives which ge t merged during interaction # the skims will be available under the name "skims" for any @ expressions origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] dest_column_name = destination_size_terms.index.name # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') if (origin_col_name == dest_column_name): origin_col_name = f'{origin_col_name}_chooser' skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap(origin_col_name, dest_column_name) locals_d = {'skims': skims} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) choices = interaction_sample(choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps # (broadcasts person_id onto all alternatives sharing the same tour_id index value) choices['person_id'] = choosers.person_id return choices
def joint_tour_destination_sample(joint_tours, households_merged, skim_dict, size_term_calculator, chunk_size, trace_hh_id): """ Chooses a sample of destinations from all possible tour destinations by choosing <sample_size> times from among destination alternatives. Since choice is with replacement, the number of sampled alternative may be smaller than <sample_size>, and the pick_count column indicates hom many times the sampled alternative was chosen. Household_id column is added for convenience of merging with households when the joint_tour_destination_simulate choice model is run subsequently. adds 'joint_tour_destination_sample' table to pipeline +------------+-------------+-----------+-------------+-------------+-------------+ | tour_id | alt_dest | prob | pick_count | tour_type_id| household_id| +============+=============+===========+=============+=============+=============+ | 1605124 + 14 + 0.043873 + 1 + 3 + 160512 | +------------+-------------+-----------+-------------+-------------+-------------+ | 1605124 + 18 + 0.034979 + 2 + 3 + 160512 | +------------+-------------+-----------+-------------+-------------+-------------+ | 1605124 + 16 + 0.105658 + 9 + 3 + 160512 | +------------+-------------+-----------+-------------+-------------+-------------+ | 1605124 + 17 + 0.057670 + 1 + 3 + 160512 | +------------+-------------+-----------+-------------+-------------+-------------+ Parameters ---------- joint_tours: pandas.DataFrame households_merged : pandas.DataFrame skim_dict joint_tour_destination_sample_spec size_term_calculator chunk_size trace_hh_id Returns ------- choices : pandas.DataFrame destination_sample df """ trace_label = 'joint_tour_destination_sample' model_settings = config.read_model_settings('joint_tour_destination.yaml') model_spec = simulate.read_model_spec( file_name='non_mandatory_tour_destination_sample.csv') # choosers are tours - in a sense tours are choosing their destination choosers = pd.merge(joint_tours, households_merged, left_on='household_id', right_index=True, how='left') # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] sample_size = model_settings["SAMPLE_SIZE"] # specify name interaction_sample should give the alternative column (logsums needs to know it) alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap('TAZ_chooser', 'TAZ') locals_d = {'skims': skims} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) logger.info("Running joint_tour_destination_sample with %d joint_tours", len(choosers)) choices_list = [] # segment by trip type and pick the right spec for each person type # for tour_type, choosers_segment in choosers.groupby('tour_type'): for tour_type, tour_type_id in iteritems(TOUR_TYPE_ID): choosers_segment = choosers[choosers.tour_type == tour_type] if choosers_segment.shape[0] == 0: logger.info("%s skipping tour_type %s: no tours", trace_label, tour_type) continue # alts indexed by taz with one column containing size_term for this tour_type alternatives_segment = size_term_calculator.dest_size_terms_df( tour_type) # FIXME - no point in considering impossible alternatives (where dest size term is zero) alternatives_segment = alternatives_segment[ alternatives_segment['size_term'] > 0] logger.info( "Running segment '%s' of %d joint_tours %d alternatives" % (tour_type, len(choosers_segment), len(alternatives_segment))) if len(choosers_segment.index) > 0: # want named index so tracing knows how to slice assert choosers_segment.index.name == 'tour_id' choices = interaction_sample( choosers_segment, alternatives_segment, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=model_spec[[tour_type]], skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, tour_type)) choices['tour_type_id'] = tour_type_id choices_list.append(choices) choices = pd.concat(choices_list) # - NARROW choices['tour_type_id'] = choices['tour_type_id'].astype(np.uint8) if trace_hh_id: tracing.trace_df(choices, label="joint_tour_destination_sample", transpose=True) return choices
def _od_sample(spec_segment_name, choosers, network_los, destination_size_terms, origin_id_col, dest_id_col, skims, estimator, model_settings, alt_od_col_name, chunk_size, chunk_tag, trace_label): model_spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=spec_segment_name, estimator=estimator) if alt_od_col_name is None: alt_col_name = get_od_id_col(origin_id_col, dest_id_col) else: alt_col_name = alt_od_col_name logger.info("running %s with %d tours", trace_label, len(choosers)) sample_size = model_settings['SAMPLE_SIZE'] if config.setting('disable_destination_sampling', False) or (estimator and estimator.want_unsampled_alternatives): # FIXME interaction_sample will return unsampled complete alternatives # with probs and pick_count logger.info(("Estimation mode for %s using unsampled alternatives " "short_circuit_choices") % trace_label) sample_size = 0 locals_d = {'skims': skims} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) origin_filter = model_settings.get('ORIG_FILTER', None) origin_attr_cols = model_settings['ORIGIN_ATTR_COLS_TO_USE'] od_alts_df = _create_od_alts_from_dest_size_terms( destination_size_terms, spec_segment_name, od_id_col=alt_col_name, origin_id_col=origin_id_col, dest_id_col=dest_id_col, origin_filter=origin_filter, origin_attr_cols=origin_attr_cols) if skims.orig_key == ORIG_TAZ: od_alts_df[ORIG_TAZ] = map_maz_to_taz(od_alts_df[origin_id_col], network_los) elif skims.orig_key not in od_alts_df: logger.error("Alts df is missing origin skim key column.") choices = interaction_sample(choosers, alternatives=od_alts_df, sample_size=sample_size, alt_col_name=alt_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) return choices