예제 #1
0
def _location_sample(segment_name, choosers, alternatives, skims, estimator,
                     model_settings, alt_dest_col_name, chunk_size, chunk_tag,
                     trace_label):
    """
    select a sample of alternative locations.

    Logsum calculations are expensive, so we build a table of persons * all zones
    and then select a sample subset of potential locations

    The sample subset is generated by making multiple choices (<sample_size> number of choices)
    which results in sample containing up to <sample_size> choices for each choose (e.g. person)
    and a pick_count indicating how many times that choice was selected for that chooser.)

    person_id,  dest_zone_id, rand,            pick_count
    23750,      14,           0.565502716034,  4
    23750,      16,           0.711135838871,  6
    ...
    23751,      12,           0.408038878552,  1
    23751,      14,           0.972732479292,  2
    """
    assert not choosers.empty

    logger.info("Running %s with %d persons" %
                (trace_label, len(choosers.index)))

    sample_size = model_settings["SAMPLE_SIZE"]
    if config.setting('disable_destination_sampling',
                      False) or (estimator
                                 and estimator.want_unsampled_alternatives):
        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
        logger.info(
            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
            % (trace_label, ))
        sample_size = 0

    locals_d = {'skims': skims, 'segment_size': segment_name}
    constants = config.get_model_constants(model_settings)
    locals_d.update(constants)

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='SAMPLE_SPEC',
                                     segment_name=segment_name,
                                     estimator=estimator)

    # here since presumably we want this when called for either sample or presample
    log_alt_losers = config.setting('log_alt_losers', False)

    choices = interaction_sample(choosers,
                                 alternatives,
                                 spec=spec,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 log_alt_losers=log_alt_losers,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 chunk_tag=chunk_tag,
                                 trace_label=trace_label)

    return choices
예제 #2
0
def _destination_sample(primary_purpose, trips, alternatives, model_settings,
                        size_term_matrix, skims, alt_dest_col_name, estimator,
                        chunk_size, chunk_tag, trace_label):
    """

    Note: trips with no viable destination receive no sample rows
    (because we call interaction_sample with allow_zero_probs=True)
    All other trips will have one or more rows with pick_count summing to sample_size

    returns
        choices: pandas.DataFrame

               alt_dest      prob  pick_count
    trip_id
    102829169      2898  0.002333           1
    102829169      2901  0.004976           1
    102829169      3193  0.002628           1
    """

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='DESTINATION_SAMPLE_SPEC',
                                     segment_name=primary_purpose,
                                     estimator=estimator)

    sample_size = model_settings['SAMPLE_SIZE']
    if config.setting('disable_destination_sampling',
                      False) or (estimator
                                 and estimator.want_unsampled_alternatives):
        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
        logger.info(
            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
            % (trace_label, ))
        sample_size = 0

    locals_dict = config.get_model_constants(model_settings).copy()

    # size_terms of destination zones are purpose-specific, and trips have various purposes
    # so the relevant size_term for each interaction_sample row
    # cannot be determined until after choosers are joined with alternatives
    # (unless we iterate over trip.purpose - which we could, though we are already iterating over trip_num)
    # so, instead, expressions determine row-specific size_term by a call to: size_terms.get(df.alt_dest, df.purpose)
    locals_dict.update({'size_terms': size_term_matrix})
    locals_dict.update(skims)

    log_alt_losers = config.setting('log_alt_losers', False)

    choices = interaction_sample(choosers=trips,
                                 alternatives=alternatives,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 log_alt_losers=log_alt_losers,
                                 allow_zero_probs=True,
                                 spec=spec,
                                 skims=skims,
                                 locals_d=locals_dict,
                                 chunk_size=chunk_size,
                                 chunk_tag=chunk_tag,
                                 trace_label=trace_label)

    return choices
예제 #3
0
def atwork_subtour_destination_sample(tours, persons_merged, model_settings,
                                      network_los, destination_size_terms,
                                      estimator, chunk_size, trace_label):

    model_spec = simulate.read_model_spec(
        file_name=model_settings['SAMPLE_SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings['SAMPLE_SIZE']
    if estimator:
        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
        logger.info(
            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
            % (trace_label, ))
        sample_size = 0

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    logger.info("Running atwork_subtour_location_sample with %d tours",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_zone_id
    # in the choosers and a zone_id in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    dest_column_name = destination_size_terms.index.name
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap('workplace_zone_id', dest_column_name)

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(choosers,
                                 alternatives=destination_size_terms,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 spec=model_spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    choices['person_id'] = choosers.person_id

    return choices
def atwork_subtour_destination_sample(tours,
                                      persons_merged,
                                      atwork_subtour_destination_sample_spec,
                                      skim_dict,
                                      destination_size_terms,
                                      chunk_size,
                                      trace_hh_id):

    trace_label = 'atwork_subtour_location_sample'
    model_settings = inject.get_injectable('atwork_subtour_destination_settings')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    tours = tours[tours.tour_category == 'subtour']

    # merge persons into tours
    choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True)

    alternatives = destination_size_terms.to_frame()

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_col_name = model_settings["ALT_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    logger.info("Running atwork_subtour_location_sample with %d persons" % len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, 'TAZ')

    locals_d = {
        'skims': skims
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    choices = interaction_sample(
        choosers,
        alternatives,
        sample_size=sample_size,
        alt_col_name=alt_col_name,
        spec=atwork_subtour_destination_sample_spec,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    choices['person_id'] = choosers.person_id
    choices['workplace_taz'] = choosers.workplace_taz

    inject.add_table('atwork_subtour_destination_sample', choices)
예제 #5
0
def run_destination_sample(
        spec_segment_name,
        tours,
        persons_merged,
        model_settings,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_label):

    model_spec_file_name = model_settings['SAMPLE_SPEC']
    model_spec = simulate.read_model_spec(file_name=model_spec_file_name)
    model_spec = model_spec[[spec_segment_name]]

    # merge persons into tours
    choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("running %s with %d tours", trace_label, len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser')
    # the skims will be available under the name "skims" for any @ expressions
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    if origin_col_name == 'TAZ':
        origin_col_name = 'TAZ_chooser'
    skims = skim_dict.wrap(origin_col_name, 'TAZ')

    locals_d = {
        'skims': skims
    }
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(
        choosers,
        alternatives=destination_size_terms,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        spec=model_spec,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    # (broadcasts person_id onto all alternatives sharing the same tour_id index value)
    choices['person_id'] = choosers.person_id

    return choices
예제 #6
0
def run_destination_sample(spec_segment_name, tours, persons_merged,
                           model_settings, skim_dict, destination_size_terms,
                           chunk_size, trace_label):

    model_spec_file_name = model_settings['SAMPLE_SPEC']
    model_spec = simulate.read_model_spec(file_name=model_spec_file_name)
    model_spec = model_spec[[spec_segment_name]]

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("running %s with %d tours", trace_label, len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser')
    # the skims will be available under the name "skims" for any @ expressions
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    if origin_col_name == 'TAZ':
        origin_col_name = 'TAZ_chooser'
    skims = skim_dict.wrap(origin_col_name, 'TAZ')

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(choosers,
                                 alternatives=destination_size_terms,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 spec=model_spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    # (broadcasts person_id onto all alternatives sharing the same tour_id index value)
    choices['person_id'] = choosers.person_id

    return choices
예제 #7
0
def trip_destination_sample(
        primary_purpose,
        trips,
        alternatives,
        model_settings,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):
    """

    Returns
    -------
    destination_sample: pandas.dataframe
        choices_df from interaction_sample with (up to) sample_size alts for each chooser row
        index (non unique) is trip_id from trips (duplicated for each alt)
        and columns dest_taz, prob, and pick_count

        dest_taz: int
            alt identifier (dest_taz) from alternatives[<alt_col_name>]
        prob: float
            the probability of the chosen alternative
        pick_count : int
            number of duplicate picks for chooser, alt
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_destination_sample')

    spec = get_spec_for_purpose(model_settings, 'DESTINATION_SAMPLE_SPEC', primary_purpose)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST"]

    logger.info("Running %s with %d trips", trace_label, trips.shape[0])

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({
        'size_terms': size_term_matrix
    })
    locals_dict.update(skims)

    destination_sample = interaction_sample(
        choosers=trips,
        alternatives=alternatives,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        allow_zero_probs=True,
        spec=spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label)

    return destination_sample
def workplace_location_sample(persons_merged, workplace_location_sample_spec,
                              workplace_location_settings, skim_dict,
                              destination_size_terms, chunk_size, trace_hh_id):
    """
    build a table of workers * all zones in order to select a sample of alternative work locations.

    PERID,  dest_TAZ, rand,            pick_count
    23750,  14,       0.565502716034,  4
    23750,  16,       0.711135838871,  6
    ...
    23751,  12,       0.408038878552,  1
    23751,  14,       0.972732479292,  2
    """

    trace_label = 'workplace_location_sample'

    choosers = persons_merged.to_frame()
    alternatives = destination_size_terms.to_frame()

    constants = config.get_model_constants(workplace_location_settings)

    sample_size = workplace_location_settings["SAMPLE_SIZE"]
    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    logger.info("Running workplace_location_sample with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", "TAZ_r")

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    choices = interaction_sample(choosers,
                                 alternatives,
                                 sample_size=sample_size,
                                 alt_col_name=alt_col_name,
                                 spec=workplace_location_sample_spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 trace_label=trace_label)

    orca.add_table('workplace_location_sample', choices)
예제 #9
0
def atwork_subtour_destination_sample(tours, persons_merged, skim_dict,
                                      destination_size_terms, chunk_size,
                                      trace_hh_id):

    trace_label = 'atwork_subtour_location_sample'
    model_settings = config.read_model_settings(
        'atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(
        file_name='atwork_subtour_destination_sample.csv')

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("Running atwork_subtour_location_sample with %d tours",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap('workplace_taz', 'TAZ')

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(choosers,
                                 alternatives=destination_size_terms,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 spec=model_spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    choices['person_id'] = choosers.person_id

    return choices
예제 #10
0
def atwork_subtour_destination_sample(
        tours,
        persons_merged,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id):

    trace_label = 'atwork_subtour_location_sample'
    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination_sample.csv')

    # merge persons into tours
    choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap('workplace_taz', 'TAZ')

    locals_d = {
        'skims': skims
    }
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(
        choosers,
        alternatives=destination_size_terms,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        spec=model_spec,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    choices['person_id'] = choosers.person_id

    return choices
예제 #11
0
def _destination_sample(spec_segment_name, choosers, destination_size_terms,
                        skims, estimator, model_settings, alt_dest_col_name,
                        chunk_size, chunk_tag, trace_label):

    model_spec = simulate.spec_for_segment(model_settings,
                                           spec_id='SAMPLE_SPEC',
                                           segment_name=spec_segment_name,
                                           estimator=estimator)

    logger.info("running %s with %d tours", trace_label, len(choosers))

    sample_size = model_settings['SAMPLE_SIZE']
    if config.setting('disable_destination_sampling',
                      False) or (estimator
                                 and estimator.want_unsampled_alternatives):
        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
        logger.info(
            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
            % (trace_label, ))
        sample_size = 0

    locals_d = {'skims': skims}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    log_alt_losers = config.setting('log_alt_losers', False)

    choices = interaction_sample(choosers,
                                 alternatives=destination_size_terms,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 log_alt_losers=log_alt_losers,
                                 spec=model_spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 chunk_tag=chunk_tag,
                                 trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    # (broadcasts person_id onto all alternatives sharing the same tour_id index value)
    choices['person_id'] = choosers.person_id

    return choices
예제 #12
0
def school_location_sample(persons_merged, school_location_sample_spec,
                           school_location_settings, skim_dict,
                           destination_size_terms, chunk_size, trace_hh_id):
    """
    build a table of persons * all zones to select a sample of alternative school locations.

    PERID,  dest_TAZ, rand,            pick_count
    23750,  14,       0.565502716034,  4
    23750,  16,       0.711135838871,  6
    ...
    23751,  12,       0.408038878552,  1
    23751,  14,       0.972732479292,  2
    """

    trace_label = 'school_location_sample'

    choosers = persons_merged.to_frame()
    alternatives = destination_size_terms.to_frame()

    constants = config.get_model_constants(school_location_settings)

    sample_size = school_location_settings["SAMPLE_SIZE"]
    alt_col_name = school_location_settings["ALT_COL_NAME"]

    logger.info("Running school_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", "TAZ_r")

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]

        # FIXME - no point in considering impossible alternatives
        alternatives_segment = alternatives[alternatives[school_type] > 0]

        logger.info(
            "school_type %s:  %s persons %s alternatives" %
            (school_type, len(choosers_segment), len(alternatives_segment)))

        if len(choosers_segment.index) > 0:

            choices = interaction_sample(
                choosers_segment,
                alternatives_segment,
                sample_size=sample_size,
                alt_col_name=alt_col_name,
                spec=school_location_sample_spec[[school_type]],
                skims=skims,
                locals_d=locals_d,
                chunk_size=chunk_size,
                trace_label=tracing.extend_trace_label(trace_label,
                                                       school_type))

            choices['school_type'] = school_type
            choices_list.append(choices)

    choices = pd.concat(choices_list)

    inject.add_table('school_location_sample', choices)
예제 #13
0
def run_location_sample(
        segment_name,
        persons_merged,
        skim_dict,
        dest_size_terms,
        model_settings,
        chunk_size, trace_label):
    """
    select a sample of alternative locations.

    Logsum calculations are expensive, so we build a table of persons * all zones
    and then select a sample subset of potential locations

    The sample subset is generated by making multiple choices (<sample_size> number of choices)
    which results in sample containing up to <sample_size> choices for each choose (e.g. person)
    and a pick_count indicating how many times that choice was selected for that chooser.)

    person_id,  dest_TAZ, rand,            pick_count
    23750,      14,       0.565502716034,  4
    23750,      16,       0.711135838871,  6
    ...
    23751,      12,       0.408038878552,  1
    23751,      14,       0.972732479292,  2
    """
    assert not persons_merged.empty

    model_spec = simulate.read_model_spec(file_name=model_settings['SAMPLE_SPEC'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = persons_merged[chooser_columns]

    alternatives = dest_size_terms

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("Running %s with %d persons" % (trace_label, len(choosers.index)))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser')
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap('TAZ_chooser', 'TAZ')

    locals_d = {
        'skims': skims,
        'segment_size': segment_name
    }
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(
        choosers,
        alternatives,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        spec=spec_for_segment(model_spec, segment_name),
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    return choices
예제 #14
0
def run_destination_sample(spec_segment_name, tours, households_merged,
                           model_settings, network_los, destination_size_terms,
                           estimator, chunk_size, trace_label):

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='SAMPLE_SPEC',
                                     segment_name=spec_segment_name,
                                     estimator=estimator)

    # choosers are tours - in a sense tours are choosing their destination
    choosers = pd.merge(tours,
                        households_merged,
                        left_on='household_id',
                        right_index=True,
                        how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("running %s with %d tours", trace_label, len(choosers))

    sample_size = model_settings["SAMPLE_SIZE"]
    if estimator:
        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
        logger.info(
            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
            % (trace_label, ))
        sample_size = 0

    # create wrapper with keys for this lookup - in this case there is a workplace_zone_id
    # in the choosers and a zone_id in the alternatives which ge t merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    dest_column_name = destination_size_terms.index.name

    # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser')
    if (origin_col_name == dest_column_name):
        origin_col_name = f'{origin_col_name}_chooser'

    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap(origin_col_name, dest_column_name)

    locals_d = {'skims': skims}

    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(choosers,
                                 alternatives=destination_size_terms,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 spec=spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    # (broadcasts person_id onto all alternatives sharing the same tour_id index value)
    choices['person_id'] = choosers.person_id

    return choices
예제 #15
0
def joint_tour_destination_sample(joint_tours, households_merged, skim_dict,
                                  size_term_calculator, chunk_size,
                                  trace_hh_id):
    """
    Chooses a sample of destinations from all possible tour destinations by choosing
    <sample_size> times from among destination alternatives.
    Since choice is with replacement, the number of sampled alternative may be smaller
    than <sample_size>, and the pick_count column indicates hom many times the sampled
    alternative was chosen.

    Household_id column is added for convenience of merging with households when the
    joint_tour_destination_simulate choice model is run subsequently.

    adds 'joint_tour_destination_sample' table to pipeline

    +------------+-------------+-----------+-------------+-------------+-------------+
    | tour_id    |  alt_dest   |   prob    |  pick_count | tour_type_id| household_id|
    +============+=============+===========+=============+=============+=============+
    | 1605124    +          14 + 0.043873  +         1   +          3  +     160512  |
    +------------+-------------+-----------+-------------+-------------+-------------+
    | 1605124    +          18 + 0.034979  +         2   +          3  +     160512  |
    +------------+-------------+-----------+-------------+-------------+-------------+
    | 1605124    +          16 + 0.105658  +         9   +          3  +     160512  |
    +------------+-------------+-----------+-------------+-------------+-------------+
    | 1605124    +          17 + 0.057670  +         1   +          3  +     160512  |
    +------------+-------------+-----------+-------------+-------------+-------------+


    Parameters
    ----------
    joint_tours: pandas.DataFrame
    households_merged : pandas.DataFrame
    skim_dict
    joint_tour_destination_sample_spec
    size_term_calculator
    chunk_size
    trace_hh_id

    Returns
    -------

    choices : pandas.DataFrame
        destination_sample df

    """

    trace_label = 'joint_tour_destination_sample'
    model_settings = config.read_model_settings('joint_tour_destination.yaml')
    model_spec = simulate.read_model_spec(
        file_name='non_mandatory_tour_destination_sample.csv')

    # choosers are tours - in a sense tours are choosing their destination
    choosers = pd.merge(joint_tours,
                        households_merged,
                        left_on='household_id',
                        right_index=True,
                        how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    sample_size = model_settings["SAMPLE_SIZE"]

    # specify name interaction_sample should give the alternative column (logsums needs to know it)
    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser')
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap('TAZ_chooser', 'TAZ')

    locals_d = {'skims': skims}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    logger.info("Running joint_tour_destination_sample with %d joint_tours",
                len(choosers))

    choices_list = []
    # segment by trip type and pick the right spec for each person type
    # for tour_type, choosers_segment in choosers.groupby('tour_type'):
    for tour_type, tour_type_id in iteritems(TOUR_TYPE_ID):

        choosers_segment = choosers[choosers.tour_type == tour_type]

        if choosers_segment.shape[0] == 0:
            logger.info("%s skipping tour_type %s: no tours", trace_label,
                        tour_type)
            continue

        # alts indexed by taz with one column containing size_term for this tour_type
        alternatives_segment = size_term_calculator.dest_size_terms_df(
            tour_type)

        # FIXME - no point in considering impossible alternatives (where dest size term is zero)
        alternatives_segment = alternatives_segment[
            alternatives_segment['size_term'] > 0]

        logger.info(
            "Running segment '%s' of %d joint_tours %d alternatives" %
            (tour_type, len(choosers_segment), len(alternatives_segment)))

        if len(choosers_segment.index) > 0:
            # want named index so tracing knows how to slice
            assert choosers_segment.index.name == 'tour_id'

            choices = interaction_sample(
                choosers_segment,
                alternatives_segment,
                sample_size=sample_size,
                alt_col_name=alt_dest_col_name,
                spec=model_spec[[tour_type]],
                skims=skims,
                locals_d=locals_d,
                chunk_size=chunk_size,
                trace_label=tracing.extend_trace_label(trace_label, tour_type))

            choices['tour_type_id'] = tour_type_id

            choices_list.append(choices)

    choices = pd.concat(choices_list)

    # - NARROW
    choices['tour_type_id'] = choices['tour_type_id'].astype(np.uint8)

    if trace_hh_id:
        tracing.trace_df(choices,
                         label="joint_tour_destination_sample",
                         transpose=True)

    return choices
예제 #16
0
def _od_sample(spec_segment_name, choosers, network_los,
               destination_size_terms, origin_id_col, dest_id_col, skims,
               estimator, model_settings, alt_od_col_name, chunk_size,
               chunk_tag, trace_label):

    model_spec = simulate.spec_for_segment(model_settings,
                                           spec_id='SAMPLE_SPEC',
                                           segment_name=spec_segment_name,
                                           estimator=estimator)
    if alt_od_col_name is None:
        alt_col_name = get_od_id_col(origin_id_col, dest_id_col)
    else:
        alt_col_name = alt_od_col_name

    logger.info("running %s with %d tours", trace_label, len(choosers))

    sample_size = model_settings['SAMPLE_SIZE']
    if config.setting('disable_destination_sampling',
                      False) or (estimator
                                 and estimator.want_unsampled_alternatives):
        # FIXME interaction_sample will return unsampled complete alternatives
        # with probs and pick_count
        logger.info(("Estimation mode for %s using unsampled alternatives "
                     "short_circuit_choices") % trace_label)
        sample_size = 0

    locals_d = {'skims': skims}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    origin_filter = model_settings.get('ORIG_FILTER', None)
    origin_attr_cols = model_settings['ORIGIN_ATTR_COLS_TO_USE']

    od_alts_df = _create_od_alts_from_dest_size_terms(
        destination_size_terms,
        spec_segment_name,
        od_id_col=alt_col_name,
        origin_id_col=origin_id_col,
        dest_id_col=dest_id_col,
        origin_filter=origin_filter,
        origin_attr_cols=origin_attr_cols)

    if skims.orig_key == ORIG_TAZ:
        od_alts_df[ORIG_TAZ] = map_maz_to_taz(od_alts_df[origin_id_col],
                                              network_los)

    elif skims.orig_key not in od_alts_df:
        logger.error("Alts df is missing origin skim key column.")

    choices = interaction_sample(choosers,
                                 alternatives=od_alts_df,
                                 sample_size=sample_size,
                                 alt_col_name=alt_col_name,
                                 spec=model_spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 chunk_tag=chunk_tag,
                                 trace_label=trace_label)

    return choices