コード例 #1
0
def run_trip_purpose_and_destination(
        trips_df,
        tours_merged_df,
        chunk_size,
        trace_hh_id,
        trace_label):

    assert not trips_df.empty

    choices = run_trip_purpose(
        trips_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=tracing.extend_trace_label(trace_label, 'purpose')
    )

    trips_df['purpose'] = choices

    trips_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size, trace_hh_id,
        trace_label=tracing.extend_trace_label(trace_label, 'destination'))

    return trips_df
コード例 #2
0
def run_trip_purpose_and_destination(
        trips_df,
        tours_merged_df,
        chunk_size,
        trace_hh_id,
        trace_label):

    assert not trips_df.empty

    choices = run_trip_purpose(
        trips_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=tracing.extend_trace_label(trace_label, 'purpose')
    )

    trips_df['purpose'] = choices

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size, trace_hh_id,
        trace_label=tracing.extend_trace_label(trace_label, 'destination'))

    return trips_df, save_sample_df
コード例 #3
0
def run_trip_scheduling(
        trips,
        tours,
        probs_spec,
        model_settings,
        estimator,
        is_last_iteration,
        chunk_size,
        chunk_tag,
        trace_hh_id,
        trace_label):

    # only non-initial trips require scheduling, segment handing first such trip in tour will use most space
    # is_outbound_chooser = (trips.trip_num > 1) & trips.outbound & (trips.primary_purpose != 'atwork')
    # is_inbound_chooser = (trips.trip_num < trips.trip_count) & ~trips.outbound & (trips.primary_purpose != 'atwork')
    # num_choosers = (is_inbound_chooser | is_outbound_chooser).sum()

    result_list = []
    for i, trips_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers_by_chunk_id(trips, chunk_size, trace_label, chunk_tag):

        if trips_chunk.outbound.any():
            leg_chunk = trips_chunk[trips_chunk.outbound]
            leg_trace_label = tracing.extend_trace_label(chunk_trace_label, 'outbound')
            choices = \
                schedule_trips_in_leg(
                    outbound=True,
                    trips=leg_chunk,
                    probs_spec=probs_spec,
                    model_settings=model_settings,
                    is_last_iteration=is_last_iteration,
                    trace_hh_id=trace_hh_id,
                    trace_label=leg_trace_label)
            result_list.append(choices)

            chunk.log_df(trace_label, f'result_list', result_list)

        if (~trips_chunk.outbound).any():
            leg_chunk = trips_chunk[~trips_chunk.outbound]
            leg_trace_label = tracing.extend_trace_label(chunk_trace_label, 'inbound')
            choices = \
                schedule_trips_in_leg(
                    outbound=False,
                    trips=leg_chunk,
                    probs_spec=probs_spec,
                    model_settings=model_settings,
                    is_last_iteration=is_last_iteration,
                    trace_hh_id=trace_hh_id,
                    trace_label=leg_trace_label)
            result_list.append(choices)

            chunk.log_df(trace_label, f'result_list', result_list)

    choices = pd.concat(result_list)

    return choices
コード例 #4
0
def run_trip_scheduling(trips_chunk, tours, probs_spec, model_settings,
                        estimator, is_last_iteration, chunk_size, trace_hh_id,
                        trace_label):

    set_tour_hour(trips_chunk, tours)
    set_stop_num(trips_chunk)

    # only non-initial trips require scheduling, segment handing first such trip in tour will use most space
    # is_outbound_chooser = (trips.trip_num > 1) & trips.outbound & (trips.primary_purpose != 'atwork')
    # is_inbound_chooser = (trips.trip_num < trips.trip_count) & ~trips.outbound & (trips.primary_purpose != 'atwork')
    # num_choosers = (is_inbound_chooser | is_outbound_chooser).sum()

    result_list = []

    if trips_chunk.outbound.any():
        leg_chunk = trips_chunk[trips_chunk.outbound]
        leg_trace_label = tracing.extend_trace_label(trace_label, 'outbound')
        choices = \
            schedule_trips_in_leg(
                outbound=True,
                trips=leg_chunk,
                probs_spec=probs_spec,
                model_settings=model_settings,
                is_last_iteration=is_last_iteration,
                trace_hh_id=trace_hh_id,
                trace_label=leg_trace_label)
        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

        # departure time of last outbound trips must constrain
        # departure times for initial inbound trips
        update_tour_earliest(trips_chunk, choices)

    if (~trips_chunk.outbound).any():
        leg_chunk = trips_chunk[~trips_chunk.outbound]
        leg_trace_label = tracing.extend_trace_label(trace_label, 'inbound')
        choices = \
            schedule_trips_in_leg(
                outbound=False,
                trips=leg_chunk,
                probs_spec=probs_spec,
                model_settings=model_settings,
                is_last_iteration=is_last_iteration,
                trace_hh_id=trace_hh_id,
                trace_label=leg_trace_label)
        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

    choices = pd.concat(result_list)

    return choices
コード例 #5
0
def run_trip_scheduling(trips, tours, probs_spec, model_settings,
                        last_iteration, chunk_size, trace_hh_id, trace_label):

    set_tour_hour(trips, tours)

    rows_per_chunk, effective_chunk_size = \
        trip_scheduling_rpc(chunk_size, trips, probs_spec, trace_label)

    result_list = []
    for i, num_chunks, trips_chunk in chunk.chunked_choosers_by_chunk_id(
            trips, rows_per_chunk):

        if num_chunks > 1:
            chunk_trace_label = tracing.extend_trace_label(
                trace_label, 'chunk_%s' % i)
            logger.info("%s of %s size %d" %
                        (chunk_trace_label, num_chunks, len(trips_chunk)))
        else:
            chunk_trace_label = trace_label

        leg_trace_label = tracing.extend_trace_label(chunk_trace_label,
                                                     'outbound')
        chunk.log_open(leg_trace_label, chunk_size, effective_chunk_size)
        choices = \
            schedule_trips_in_leg(
                outbound=True,
                trips=trips_chunk[trips_chunk.outbound],
                probs_spec=probs_spec,
                model_settings=model_settings,
                last_iteration=last_iteration,
                trace_hh_id=trace_hh_id,
                trace_label=leg_trace_label)
        result_list.append(choices)
        chunk.log_close(leg_trace_label)

        leg_trace_label = tracing.extend_trace_label(chunk_trace_label,
                                                     'inbound')
        chunk.log_open(leg_trace_label, chunk_size, effective_chunk_size)
        choices = \
            schedule_trips_in_leg(
                outbound=False,
                trips=trips_chunk[~trips_chunk.outbound],
                probs_spec=probs_spec,
                model_settings=model_settings,
                last_iteration=last_iteration,
                trace_hh_id=trace_hh_id,
                trace_label=leg_trace_label)
        result_list.append(choices)
        chunk.log_close(leg_trace_label)

    choices = pd.concat(result_list)

    return choices
コード例 #6
0
ファイル: pathbuilder.py プロジェクト: figo2002/activitysim
 def trace_df(self, df, trace_label, extension):
     assert len(df) > 0
     tracing.trace_df(df,
                      label=tracing.extend_trace_label(
                          trace_label, extension),
                      slicer='NONE',
                      transpose=False)
コード例 #7
0
ファイル: pathbuilder.py プロジェクト: figo2002/activitysim
    def get_tvpb_best_transit_time(self, orig, dest, tod):

        # FIXME lots of pathological knowledge here as we are only called by accessibility directly from expressions

        trace_label = tracing.extend_trace_label(
            'accessibility.tvpb_best_time', tod)
        recipe = 'accessibility'
        path_type = 'WTW'

        with chunk.chunk_log(trace_label):
            result = \
                self.build_virtual_path(recipe, path_type, orig, dest, tod,
                                        demographic_segment=None, want_choices=False,
                                        trace_label=trace_label)

            trace_od = inject.get_injectable("trace_od", None)
            if trace_od:
                filter_targets = (orig == trace_od[0]) & (dest == trace_od[1])
                if filter_targets.any():
                    self.build_virtual_path(recipe,
                                            path_type,
                                            orig,
                                            dest,
                                            tod,
                                            demographic_segment=None,
                                            want_choices=False,
                                            trace_label=trace_label,
                                            filter_targets=filter_targets,
                                            trace=True)

        return result
コード例 #8
0
    def best_paths(self, recipe, path_type, maz_od_df, access_df, egress_df, transit_df, trace_label, trace=False):

        trace_label = tracing.extend_trace_label(trace_label, 'best_paths')

        path_settings = self.network_los.setting(f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
        max_paths_per_tap_set = path_settings.get('max_paths_per_tap_set', 1)
        max_paths_across_tap_sets = path_settings.get('max_paths_across_tap_sets', 1)

        units = self.units_for_recipe(recipe)
        smaller_is_better = (units in ['time'])

        maz_od_df['seq'] = maz_od_df.index
        # maz_od_df has one row per chooser
        # inner join to add rows for each access, egress, and transit segment combination
        path_df = maz_od_df. \
            merge(access_df, on=['idx', 'omaz'], how='inner'). \
            merge(egress_df, on=['idx', 'dmaz'], how='inner'). \
            merge(transit_df, on=['idx', 'atap', 'btap'], how='inner')

        chunk.log_df(trace_label, "path_df", path_df)

        # transit sets are the transit_df non-join columns
        transit_sets = [c for c in transit_df.columns if c not in ['idx', 'atap', 'btap']]

        if trace:
            # be nice and show both tap_tap set utility and total_set = access + set + egress
            for c in transit_sets:
                path_df[f'total_{c}'] = path_df[c] + path_df['access'] + path_df['egress']
            self.trace_df(path_df, trace_label, 'best_paths.full')
            for c in transit_sets:
                del path_df[f'total_{c}']

        for c in transit_sets:
            path_df[c] = path_df[c] + path_df['access'] + path_df['egress']
        path_df.drop(columns=['access', 'egress'], inplace=True)

        # choose best paths by tap set
        best_paths_list = []
        for c in transit_sets:
            keep = path_df.index.isin(
                path_df[['seq', c]].sort_values(by=c, ascending=smaller_is_better).
                groupby(['seq']).head(max_paths_per_tap_set).index
            )

            best_paths_for_set = path_df[keep]
            best_paths_for_set['path_set'] = c  # remember the path set
            best_paths_for_set[units] = path_df[keep][c]
            best_paths_for_set.drop(columns=transit_sets, inplace=True)
            best_paths_list.append(best_paths_for_set)

        path_df = pd.concat(best_paths_list).sort_values(by=['seq', units], ascending=[True, smaller_is_better])

        # choose best paths overall by seq
        path_df = path_df.sort_values(by=['seq', units], ascending=[True, smaller_is_better])
        path_df = path_df[path_df.index.isin(path_df.groupby(['seq']).head(max_paths_across_tap_sets).index)]

        if trace:
            self.trace_df(path_df, trace_label, 'best_paths')

        return path_df
コード例 #9
0
    def __init__(self, pathbuilder, orig_key, dest_key, tod_key, segment_key,
                 cache_choices, trace_label, tag):

        self.tvpb = pathbuilder
        assert hasattr(pathbuilder, 'get_tvpb_logsum')

        self.orig_key = orig_key
        self.dest_key = dest_key
        self.tod_key = tod_key
        self.segment_key = segment_key
        self.df = None

        self.cache_choices = cache_choices
        self.cache = {} if cache_choices else None

        self.base_trace_label = tracing.extend_trace_label(
            trace_label, tag) or f'tvpb_logsum.{tag}'
        self.trace_label = self.base_trace_label
        self.tag = tag

        self.chunk_overhead = None

        assert isinstance(orig_key, str)
        assert isinstance(dest_key, str)
        assert isinstance(tod_key, str)
        assert isinstance(segment_key, str)
コード例 #10
0
    def get_tvpb_logsum(self, path_type, orig, dest, tod, demographic_segment, want_choices, trace_label=None):

        # assume they have given us a more specific name (since there may be more than one active wrapper)
        trace_label = trace_label or 'get_tvpb_logsum'
        trace_label = tracing.extend_trace_label(trace_label, path_type)

        recipe = 'tour_mode_choice'

        with chunk.chunk_log(trace_label):

            logsum_df = \
                self.build_virtual_path(recipe, path_type, orig, dest, tod, demographic_segment,
                                        want_choices=want_choices, trace_label=trace_label)

            trace_hh_id = inject.get_injectable("trace_hh_id", None)
            if trace_hh_id:
                filter_targets = tracing.trace_targets(orig)
                # choices from preceding run (because random numbers)
                override_choices = logsum_df['path_num'] if want_choices else None
                if filter_targets.any():
                    self.build_virtual_path(recipe, path_type, orig, dest, tod, demographic_segment,
                                            want_choices=want_choices, override_choices=override_choices,
                                            trace_label=trace_label, filter_targets=filter_targets, trace=True)

        return logsum_df
コード例 #11
0
    def __init__(self, model_settings, network_los, trace_label):

        self.model_settings = model_settings
        self.trace_label = tracing.extend_trace_label(trace_label,
                                                      'skim_hotel')

        self.network_los = network_los
        self.zone_system = network_los.zone_system
コード例 #12
0
ファイル: initialize.py プロジェクト: figo2002/activitysim
def annotate_tables(model_settings, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'annotate_tables')

    chunk.log_rss(trace_label)

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning(
            f"{trace_label} - annotate_tables setting is empty - nothing to do!"
        )

    assert isinstance(annotate_tables, list), \
        f"annotate_tables settings should be a list but is {type(annotate_tables)}"

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']

        chunk.log_rss(f"{trace_label}.pre-get_table.{tablename}")

        df = inject.get_table(tablename).to_frame()
        chunk.log_df(trace_label, tablename, df)

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(
                f"Setting 'column_map' has been changed to 'rename_columns'. "
                f"Support for 'column_map' in annotate_tables  will be removed in future versions.",
                FutureWarning)

            logger.info(
                f"{trace_label} - renaming {tablename} columns {column_map}")
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info(
                f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}"
            )
            expressions.assign_columns(df=df,
                                       model_settings=annotate,
                                       trace_label=trace_label)

        chunk.log_df(trace_label, tablename, df)

        # - write table to pipeline
        pipeline.replace_table(tablename, df)

        del df
        chunk.log_df(trace_label, tablename, None)
コード例 #13
0
def annotate_jtp(model_settings, trace_label):

    # - annotate persons
    persons = inject.get_table('persons').to_frame()
    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))
    pipeline.replace_table("persons", persons)
コード例 #14
0
def trip_destination_simulate(
        primary_purpose,
        trips,
        destination_sample,
        model_settings,
        want_logsums,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Chose destination from destination_sample (with od_logsum and dp_logsum columns added)


    Returns
    -------
    choices - pandas.Series
        destination alt chosen
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_dest_simulate')

    spec = get_spec_for_purpose(model_settings, 'DESTINATION_SPEC', primary_purpose)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    logger.info("Running trip_destination_simulate with %d trips", len(trips))

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({
        'size_terms': size_term_matrix
    })
    locals_dict.update(skims)

    destinations = interaction_sample_simulate(
        choosers=trips,
        alternatives=destination_sample,
        spec=spec,
        choice_column=alt_dest_col_name,
        want_logsums=want_logsums,
        allow_zero_probs=True, zero_prob_choice_val=NO_DESTINATION,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='trip_dest')

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(destinations, pd.Series)
        destinations = destinations.to_frame('choice')

    # drop any failed zero_prob destinations
    if (destinations.choice == NO_DESTINATION).any():
        # logger.debug("dropping %s failed destinations", (destinations == NO_DESTINATION).sum())
        destinations = destinations[destinations.choice != NO_DESTINATION]

    return destinations
コード例 #15
0
def location_presample(segment_name, persons_merged, network_los,
                       dest_size_terms, estimator, model_settings, chunk_size,
                       chunk_tag, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'presample')

    logger.info(f"{trace_label} location_presample")

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    assert DEST_TAZ != alt_dest_col_name

    MAZ_size_terms, TAZ_size_terms = aggregate_size_terms(
        dest_size_terms, network_los)

    # convert MAZ zone_id to 'TAZ' in choosers (persons_merged)
    # persons_merged[HOME_TAZ] = persons_merged[HOME_MAZ].map(maz_to_taz)
    assert HOME_MAZ in persons_merged
    assert HOME_TAZ in persons_merged  # 'TAZ' should already be in persons_merged from land_use

    # FIXME - MEMORY HACK - only include columns actually used in spec
    # FIXME we don't actually require that land_use provide a TAZ crosswalk
    # FIXME maybe we should add it for multi-zone (from maz_taz) if missing?
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    chooser_columns = [
        HOME_TAZ if c == HOME_MAZ else c for c in chooser_columns
    ]
    choosers = persons_merged[chooser_columns]

    # create wrapper with keys for this lookup - in this case there is a HOME_TAZ in the choosers
    # and a DEST_TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_skim_dict('taz')
    skims = skim_dict.wrap(HOME_TAZ, DEST_TAZ)

    taz_sample = _location_sample(segment_name, choosers, TAZ_size_terms,
                                  skims, estimator, model_settings, DEST_TAZ,
                                  chunk_size, chunk_tag, trace_label)

    # print(f"taz_sample\n{taz_sample}")
    #            dest_TAZ      prob  pick_count
    # person_id
    # 55227             7  0.009827           1
    # 55227            10  0.000656           1
    # 55227            18  0.014871           1
    # 55227            20  0.035548           3

    # choose a MAZ for each DEST_TAZ choice, choice probability based on MAZ size_term fraction of TAZ total
    maz_choices = tour_destination.choose_MAZ_for_TAZ(taz_sample,
                                                      MAZ_size_terms,
                                                      trace_label)

    assert DEST_MAZ in maz_choices
    maz_choices = maz_choices.rename(columns={DEST_MAZ: alt_dest_col_name})

    return maz_choices
コード例 #16
0
def schedule_tours(
        tours, persons_merged, alts, spec, constants, timetable,
        previous_tour, window_id_col, chunk_size, tour_trace_label):
    """
    chunking wrapper for _schedule_tours

    While interaction_sample_simulate provides chunking support, the merged tours, persons
    dataframe and the tdd_interaction_dataset are very big, so we want to create them inside
    the chunking loop to minimize memory footprint. So we implement the chunking loop here,
    and pass a chunk_size of 0 to interaction_sample_simulate to disable its chunking support.

    """
    # return _schedule_tours(tours, persons_merged, alts, spec, constants, timetable,
    #                        previous_tour, window_id_col, chunk_size, tour_trace_label)

    logger.info("%s schedule_tours running %d tour choices" % (tour_trace_label, len(tours)))

    # persons_merged columns plus 2 previous tour columns
    extra_chooser_columns = persons_merged.shape[1] + 2

    rows_per_chunk = \
        calc_rows_per_chunk(chunk_size, tours, persons_merged, alts, trace_label=tour_trace_label)

    logger.info("chunk_size %s rows_per_chunk %s" % (chunk_size, rows_per_chunk))

    result_list = []
    for i, num_chunks, chooser_chunk \
            in chunk.chunked_choosers(tours, rows_per_chunk):

        logger.info("Running chunk %s of %s size %d" % (i, num_chunks, len(chooser_chunk)))

        chunk_trace_label = tracing.extend_trace_label(tour_trace_label, 'chunk_%s' % i) \
            if num_chunks > 1 else tour_trace_label

        choices = _schedule_tours(chooser_chunk, persons_merged,
                                  alts, spec, constants,
                                  timetable,
                                  previous_tour, window_id_col,
                                  tour_trace_label=chunk_trace_label)

        result_list.append(choices)

        force_garbage_collect()

    # FIXME: this will require 2X RAM
    # if necessary, could append to hdf5 store on disk:
    # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
    if len(result_list) > 1:
        choices = pd.concat(result_list)

    assert len(choices.index == len(tours.index))

    return choices
コード例 #17
0
def trip_destination_sample(
        primary_purpose,
        trips,
        alternatives,
        model_settings,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):
    """

    Returns
    -------
    destination_sample: pandas.dataframe
        choices_df from interaction_sample with (up to) sample_size alts for each chooser row
        index (non unique) is trip_id from trips (duplicated for each alt)
        and columns dest_taz, prob, and pick_count

        dest_taz: int
            alt identifier (dest_taz) from alternatives[<alt_col_name>]
        prob: float
            the probability of the chosen alternative
        pick_count : int
            number of duplicate picks for chooser, alt
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_destination_sample')

    spec = get_spec_for_purpose(model_settings, 'DESTINATION_SAMPLE_SPEC', primary_purpose)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST"]

    logger.info("Running %s with %d trips", trace_label, trips.shape[0])

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({
        'size_terms': size_term_matrix
    })
    locals_dict.update(skims)

    destination_sample = interaction_sample(
        choosers=trips,
        alternatives=alternatives,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        allow_zero_probs=True,
        spec=spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label)

    return destination_sample
コード例 #18
0
def compute_utilities(network_los,
                      model_settings,
                      choosers,
                      model_constants,
                      trace_label,
                      trace=False,
                      trace_column_names=None):
    """
    Compute utilities
    """
    with chunk.chunk_log(f'tvpb compute_utilities'):
        trace_label = tracing.extend_trace_label(trace_label, 'compute_utils')

        logger.debug(
            f"{trace_label} Running compute_utilities with {choosers.shape[0]} choosers"
        )

        locals_dict = {'np': np, 'los': network_los}
        locals_dict.update(model_constants)

        # we don't grok coefficients, but allow them to use constants in spec alt columns
        spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
        for c in spec.columns:
            if c != simulate.SPEC_LABEL_NAME:
                spec[c] = spec[c].map(
                    lambda s: model_constants.get(s, s)).astype(float)

        with chunk.chunk_log(f'compute_utilities'):

            # - run preprocessor to annotate choosers
            preprocessor_settings = model_settings.get('PREPROCESSOR')
            if preprocessor_settings:

                # don't want to alter caller's dataframe
                choosers = choosers.copy()

                expressions.assign_columns(
                    df=choosers,
                    model_settings=preprocessor_settings,
                    locals_dict=locals_dict,
                    trace_label=trace_label)

            utilities = simulate.eval_utilities(
                spec,
                choosers,
                locals_d=locals_dict,
                trace_all_rows=trace,
                trace_label=trace_label,
                trace_column_names=trace_column_names)

    return utilities
コード例 #19
0
def trip_destination_sample(primary_purpose, trips, alternatives,
                            model_settings, size_term_matrix, skim_hotel,
                            estimator, chunk_size, trace_hh_id, trace_label):
    """

    Returns
    -------
    destination_sample: pandas.dataframe
        choices_df from interaction_sample with (up to) sample_size alts for each chooser row
        index (non unique) is trip_id from trips (duplicated for each alt)
        and columns dest_zone_id, prob, and pick_count

        dest_zone_id: int
            alt identifier from alternatives[<alt_col_name>]
        prob: float
            the probability of the chosen alternative
        pick_count : int
            number of duplicate picks for chooser, alt
    """
    trace_label = tracing.extend_trace_label(trace_label,
                                             'trip_destination_sample')

    assert len(trips) > 0
    assert len(alternatives) > 0

    # by default, enable presampling for multizone systems, unless they disable it in settings file
    network_los = inject.get_injectable('network_los')
    pre_sample_taz = not (network_los.zone_system == los.ONE_ZONE)
    if pre_sample_taz and not config.setting('want_dest_choice_presampling',
                                             True):
        pre_sample_taz = False
        logger.info(f"Disabled destination zone presampling for {trace_label} "
                    f"because 'want_dest_choice_presampling' setting is False")

    if pre_sample_taz:

        logger.info("Running %s trip_destination_presample with %d trips" %
                    (trace_label, len(trips)))

        choices = destination_presample(primary_purpose, trips, alternatives,
                                        model_settings, size_term_matrix,
                                        skim_hotel, network_los, estimator,
                                        chunk_size, trace_hh_id, trace_label)

    else:
        choices = destination_sample(primary_purpose, trips, alternatives,
                                     model_settings, size_term_matrix,
                                     skim_hotel, estimator, chunk_size,
                                     trace_label)

    return choices
コード例 #20
0
def initialize_tours(network_los, households, persons, trace_hh_id):

    trace_label = 'initialize_tours'

    tours = read_input_table("tours")

    # FIXME can't use households_sliced injectable as flag like persons table does in case of resume_after.
    # FIXME could just always slice...
    slice_happened = \
        inject.get_injectable('households_sample_size', 0) > 0 \
        or inject.get_injectable('households_sample_size', 0) > 0
    if slice_happened:
        logger.info("slicing tours %s" % (tours.shape,))
        # keep all persons in the sampled households
        tours = tours[tours.person_id.isin(persons.index)]

    # annotate before patching tour_id to allow addition of REQUIRED_TOUR_COLUMNS defined above
    model_settings = config.read_model_settings('initialize_tours.yaml', mandatory=True)
    expressions.assign_columns(
        df=tours,
        model_settings=model_settings.get('annotate_tours'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_tours'))

    skip_patch_tour_ids = model_settings.get('skip_patch_tour_ids', False)
    if skip_patch_tour_ids:
        pass
    else:
        tours = patch_tour_ids(tours)
    assert tours.index.name == 'tour_id'

    # replace table function with dataframe
    inject.add_table('tours', tours)

    pipeline.get_rn_generator().add_channel('tours', tours)

    tracing.register_traceable_table('tours', tours)

    logger.debug(f"{len(tours.household_id.unique())} unique household_ids in tours")
    logger.debug(f"{len(households.index.unique())} unique household_ids in households")
    assert not tours.index.duplicated().any()

    tours_without_persons = ~tours.person_id.isin(persons.index)
    if tours_without_persons.any():
        logger.error(f"{tours_without_persons.sum()} tours out of {len(persons)} without persons\n"
                     f"{pd.Series({'person_id': tours_without_persons.index.values})}")
        raise RuntimeError(f"{tours_without_persons.sum()} tours with bad person_id")

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='initialize_tours',
                         warn_if_empty=True)
コード例 #21
0
ファイル: trip_destination.py プロジェクト: UDST/activitysim
def trip_destination_simulate(
        primary_purpose,
        trips,
        destination_sample,
        model_settings,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Chose destination from destination_sample (with od_logsum and dp_logsum columns added)


    Returns
    -------
    choices - pandas.Series
        destination alt chosen
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_destination_simulate')

    spec = get_spec_for_purpose(model_settings, 'DESTINATION_SPEC', primary_purpose)

    alt_dest_col_name = model_settings["ALT_DEST"]

    logger.info("Running trip_destination_simulate with %d trips", len(trips))

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({
        'size_terms': size_term_matrix
    })
    locals_dict.update(skims)

    destinations = interaction_sample_simulate(
        choosers=trips,
        alternatives=destination_sample,
        spec=spec,
        choice_column=alt_dest_col_name,
        allow_zero_probs=True, zero_prob_choice_val=NO_DESTINATION,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='trip_dest')

    # drop any failed zero_prob destinations
    if (destinations == NO_DESTINATION).any():
        # logger.debug("dropping %s failed destinations", destinations == NO_DESTINATION).sum()
        destinations = destinations[destinations != NO_DESTINATION]

    return destinations
コード例 #22
0
ファイル: tour_od.py プロジェクト: arunakkin/activitysim
def od_presample(spec_segment_name, choosers, model_settings, network_los,
                 destination_size_terms, estimator, chunk_size, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'presample')
    chunk_tag = 'tour_od.presample'

    logger.info(f"{trace_label} od_presample")

    alt_od_col_name = get_od_id_col(ORIG_MAZ, DEST_TAZ)

    MAZ_size_terms, TAZ_size_terms = aggregate_size_terms(
        destination_size_terms, network_los)

    # create wrapper with keys for this lookup - in this case there is a ORIG_TAZ
    # in the choosers and a DEST_TAZ in the alternatives which get merged during
    # interaction the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_skim_dict('taz')
    skims = skim_dict.wrap(ORIG_TAZ, DEST_TAZ)

    orig_MAZ_dest_TAZ_sample = _od_sample(spec_segment_name, choosers,
                                          network_los, TAZ_size_terms,
                                          ORIG_MAZ, DEST_TAZ, skims, estimator,
                                          model_settings, alt_od_col_name,
                                          chunk_size, chunk_tag, trace_label)

    orig_MAZ_dest_TAZ_sample[ORIG_MAZ] = orig_MAZ_dest_TAZ_sample[
        alt_od_col_name].str.split('_').str[0].astype(int)
    orig_MAZ_dest_TAZ_sample[DEST_TAZ] = orig_MAZ_dest_TAZ_sample[
        alt_od_col_name].str.split('_').str[1].astype(int)

    # choose a MAZ for each DEST_TAZ choice, choice probability based on
    # MAZ size_term fraction of TAZ total

    maz_choices = choose_MAZ_for_TAZ(orig_MAZ_dest_TAZ_sample,
                                     MAZ_size_terms,
                                     trace_label,
                                     addtl_col_for_unique_key=ORIG_MAZ)

    # outputs
    assert DEST_MAZ in maz_choices

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    chooser_orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    maz_choices = maz_choices.rename(columns={
        DEST_MAZ: alt_dest_col_name,
        ORIG_MAZ: chooser_orig_col_name
    })

    return maz_choices
コード例 #23
0
def destination_presample(primary_purpose, trips, alternatives, model_settings,
                          size_term_matrix, skim_hotel, network_los, estimator,
                          chunk_size, trace_hh_id, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'presample')
    chunk_tag = 'trip_destination.presample'  # distinguish from trip_destination.sample

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    maz_taz = network_los.maz_taz_df[['MAZ', 'TAZ']].set_index('MAZ').TAZ

    TAZ_size_term_matrix = aggregate_size_term_matrix(size_term_matrix,
                                                      maz_taz)

    TRIP_ORIGIN = model_settings['TRIP_ORIGIN']
    PRIMARY_DEST = model_settings['PRIMARY_DEST']
    trips = trips.copy()

    trips[TRIP_ORIGIN] = trips[TRIP_ORIGIN].map(maz_taz)
    trips[PRIMARY_DEST] = trips[PRIMARY_DEST].map(maz_taz)

    # alternatives is just an empty dataframe indexed by maz with index name <alt_dest_col_name>
    # but logically, we are aggregating so lets do it, as there is no particular gain in being clever
    alternatives = alternatives.groupby(alternatives.index.map(maz_taz)).sum()

    skims = skim_hotel.sample_skims(presample=True)

    taz_sample = _destination_sample(primary_purpose,
                                     trips,
                                     alternatives,
                                     model_settings,
                                     TAZ_size_term_matrix,
                                     skims,
                                     alt_dest_col_name,
                                     estimator,
                                     chunk_size,
                                     chunk_tag=chunk_tag,
                                     trace_label=trace_label)

    # choose a MAZ for each DEST_TAZ choice, choice probability based on MAZ size_term fraction of TAZ total
    maz_sample = choose_MAZ_for_TAZ(taz_sample, size_term_matrix, trips,
                                    network_los, alt_dest_col_name,
                                    trace_label)

    assert alt_dest_col_name in maz_sample

    return maz_sample
コード例 #24
0
def parking_destination_simulate(segment_name, trips, destination_sample,
                                 model_settings, skims, chunk_size,
                                 trace_hh_id, trace_label):
    """
    Chose destination from destination_sample (with od_logsum and dp_logsum columns added)


    Returns
    -------
    choices - pandas.Series
        destination alt chosen
    """
    trace_label = tracing.extend_trace_label(trace_label,
                                             'trip_destination_simulate')

    spec = get_spec_for_segment(model_settings, 'SPECIFICATION', segment_name)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    logger.info("Running trip_destination_simulate with %d trips", len(trips))

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update(skims)

    parking_locations = interaction_sample_simulate(
        choosers=trips,
        alternatives=destination_sample,
        spec=spec,
        choice_column=alt_dest_col_name,
        want_logsums=False,
        allow_zero_probs=True,
        zero_prob_choice_val=NO_DESTINATION,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='parking_loc')

    # drop any failed zero_prob destinations
    if (parking_locations == NO_DESTINATION).any():
        logger.debug("dropping %s failed parking locations",
                     (parking_locations == NO_DESTINATION).sum())
        parking_locations = parking_locations[
            parking_locations != NO_DESTINATION]

    return parking_locations
コード例 #25
0
def destination_presample(spec_segment_name, choosers, model_settings,
                          network_los, destination_size_terms, estimator,
                          chunk_size, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'presample')
    chunk_tag = 'tour_destination.presample'

    logger.info(f"{trace_label} location_presample")

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    assert DEST_TAZ != alt_dest_col_name

    MAZ_size_terms, TAZ_size_terms = aggregate_size_terms(
        destination_size_terms, network_los)

    orig_maz = model_settings['CHOOSER_ORIG_COL_NAME']
    assert orig_maz in choosers
    if ORIG_TAZ not in choosers:
        choosers[ORIG_TAZ] = map_maz_to_taz(choosers[orig_maz], network_los)

    # create wrapper with keys for this lookup - in this case there is a HOME_TAZ in the choosers
    # and a DEST_TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_skim_dict('taz')
    skims = skim_dict.wrap(ORIG_TAZ, DEST_TAZ)

    taz_sample = _destination_sample(spec_segment_name,
                                     choosers,
                                     TAZ_size_terms,
                                     skims,
                                     estimator,
                                     model_settings,
                                     DEST_TAZ,
                                     chunk_size,
                                     chunk_tag=chunk_tag,
                                     trace_label=trace_label)

    # choose a MAZ for each DEST_TAZ choice, choice probability based on MAZ size_term fraction of TAZ total
    maz_choices = choose_MAZ_for_TAZ(taz_sample, MAZ_size_terms, trace_label)

    assert DEST_MAZ in maz_choices
    maz_choices = maz_choices.rename(columns={DEST_MAZ: alt_dest_col_name})

    return maz_choices
コード例 #26
0
def add_null_results(trace_label, mandatory_tour_frequency_settings):
    logger.info("Skipping %s: add_null_results", trace_label)

    persons = inject.get_table('persons').to_frame()
    persons['mandatory_tour_frequency'] = ''

    tours = pd.DataFrame()
    tours['tour_category'] = None
    tours['tour_type'] = None
    tours['person_id'] = None
    tours.index.name = 'tour_id'
    pipeline.replace_table("tours", tours)

    expressions.assign_columns(
        df=persons,
        model_settings=mandatory_tour_frequency_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)
コード例 #27
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        model_constants = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.CONSTANTS')
        tap_tap_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

        with memo("#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
            transit_df = self.all_transit_paths(access_df, egress_df,
                                                chooser_attributes,
                                                trace_label, trace)
            # note: transit_df index is arbitrary
            chunk.log_df(trace_label, "transit_df", transit_df)

        locals_d = {'los': self.network_los}
        locals_d.update(model_constants)

        assignment_spec = assign.read_assignment_spec(
            file_name=config.config_file_path(tap_tap_settings['SPEC']))

        results, _, _ = assign.assign_variables(assignment_spec, transit_df,
                                                locals_d)
        assert len(results.columns == 1)
        transit_df['transit'] = results

        # filter out unavailable btap_atap pairs
        logger.debug(
            f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
        )
        transit_df = transit_df[transit_df.transit > 0]

        transit_df.drop(columns=chooser_attributes.columns, inplace=True)

        chunk.log_df(trace_label, "transit_df", None)

        if trace:
            self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
コード例 #28
0
def add_null_results(trace_label, mandatory_tour_frequency_settings):
    logger.info("Skipping %s: add_null_results", trace_label)

    persons = inject.get_table('persons').to_frame()
    persons['mandatory_tour_frequency'] = ''

    tours = pd.DataFrame()
    tours['tour_category'] = None
    tours['tour_type'] = None
    tours['person_id'] = None
    tours.index.name = 'tour_id'
    pipeline.replace_table("tours", tours)

    expressions.assign_columns(
        df=persons,
        model_settings=mandatory_tour_frequency_settings.get(
            'annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)
コード例 #29
0
    def all_transit_paths(self, access_df, egress_df, chooser_attributes, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label, 'all_transit_paths')

        # deduped transit_df has one row per chooser for each boarding (btap) and alighting (atap) pair
        transit_df = pd.merge(
            access_df[['idx', 'btap']],
            egress_df[['idx', 'atap']],
            on='idx').drop_duplicates()

        # don't want transit trips that start and stop in same tap
        transit_df = transit_df[transit_df.atap != transit_df.btap]

        for c in list(chooser_attributes.columns):
            transit_df[c] = reindex(chooser_attributes[c], transit_df['idx'])

        transit_df = transit_df.reset_index(drop=True)

        if trace:
            self.trace_df(transit_df, trace_label, 'all_transit_df')

        return transit_df
コード例 #30
0
def non_mandatory_tour_frequency(persons, persons_merged,
                                 chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings = config.read_model_settings('non_mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('non_mandatory_tour_frequency_alternatives.csv'),
        set_index=None)

    choosers = persons_merged.to_frame()

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_max_window': person_max_window
        }

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers))

    constants = config.get_model_constants(model_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for ptype, segment in choosers.groupby('ptype'):

        name = PTYPE_NAME[ptype]

        # pick the spec column for the segment
        spec = model_spec[[name]]

        # drop any zero-valued rows
        spec = spec[spec[name] != 0]

        logger.info("Running segment '%s' of size %d", name, len(segment))

        choices = interaction_simulate(
            segment,
            alternatives,
            spec=spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        # FIXME - force garbage collection?
        # force_garbage_collect()

    choices = pd.concat(choices_list)

    del alternatives['tot_tours']  # del tot_tours column we added above

    # - add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    # (we expect there to be an alt with no tours - which we can use to backfill non-travelers)
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)

    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    # - get counts of each of the alternatives (so we can extend)
    # (choices is just the index values for the chosen alts)
    """
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """
    tour_counts = alternatives.loc[choices]
    tour_counts.index = choices.index  # assign person ids to the index

    prev_tour_count = tour_counts.sum().sum()

    # - extend_tour_counts
    tour_counts = extend_tour_counts(choosers, tour_counts, alternatives,
                                     trace_hh_id,
                                     tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    extended_tour_count = tour_counts.sum().sum()

    logging.info("extend_tour_counts increased nmtf tour count by %s from %s to %s" %
                 (extended_tour_count - prev_tour_count, prev_tour_count, extended_tour_count))

    # - create the non_mandatory tours
    non_mandatory_tours = process_non_mandatory_tours(persons, tour_counts)
    assert len(non_mandatory_tours) == extended_tour_count

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="non_mandatory_tour_frequency.annotated_persons",
                         warn_if_empty=True)
コード例 #31
0
ファイル: cdap.py プロジェクト: UDST/activitysim
def cdap_simulate(persons_merged, persons, households,
                  cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions,
                  chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    trace_label = 'cdap'
    model_settings = config.read_model_settings('cdap.yaml')

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    cdap_interaction_coefficients = \
        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

    # specs are built just-in-time on demand and cached as injectables
    # prebuilding here allows us to write them to the output directory
    # (also when multiprocessing locutor might not see all household sizes)
    logger.info("Pre-building cdap specs")
    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
        spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True)
        if inject.get_injectable('locutor', False):
            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True)

    logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

    choices = cdap.run_cdap(
        persons=persons_merged,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    # - assign results to persons table and annotate
    persons = persons.to_frame()

    choices = choices.reindex(persons.index)
    persons['cdap_activity'] = choices.cdap_activity
    persons['cdap_rank'] = choices.cdap_rank

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    # - annotate households table
    households = households.to_frame()
    expressions.assign_columns(
        df=households,
        model_settings=model_settings.get('annotate_households'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
    pipeline.replace_table("households", households)

    tracing.print_summary('cdap_activity', persons.cdap_activity, value_counts=True)
    logger.info("cdap crosstabs:\n%s" %
                pd.crosstab(persons.ptype, persons.cdap_activity, margins=True))

    if trace_hh_id:

        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
コード例 #32
0
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info("%s compute_logsums for %d choosers%s alts" %
                (trace_label, choosers.shape[0], alt_tdd.shape[0]))

    # - setup skims

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    orig_col_name = 'TAZ'
    dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(tour_purpose)

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
    }

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)

    omnibus_coefficient_spec = get_coeffecients_spec(logsum_settings)
    coefficient_spec = omnibus_coefficient_spec[tour_purpose]
    coefficients = assign.evaluate_constants(coefficient_spec, constants=constants)

    locals_dict = {}
    locals_dict.update(coefficients)
    locals_dict.update(constants)
    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - compute logsums
    logsum_spec = get_logsum_spec(logsum_settings)
    nest_spec = config.get_logit_model_settings(logsum_settings)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=0,
        trace_label=trace_label)

    return logsums
コード例 #33
0
ファイル: location_choice.py プロジェクト: UDST/activitysim
def run_location_choice(
        persons_merged_df,
        skim_dict, skim_stack,
        spc,
        model_settings,
        chunk_size, trace_hh_id, trace_label
        ):
    """
    Run the three-part location choice algorithm to generate a location choice for each chooser

    Handle the various segments separately and in turn for simplicity of expression files

    Parameters
    ----------
    persons_merged_df : pandas.DataFrame
        persons table merged with households and land_use
    skim_dict : skim.SkimDict
    skim_stack : skim.SkimStack
    spc : ShadowPriceCalculator
        to get size terms
    model_settings : dict
    chunk_size : int
    trace_hh_id : int
    trace_label : str

    Returns
    -------
    pandas.Series
        location choices (zone ids) indexed by persons_merged_df.index
    """

    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    # maps segment names to compact (integer) ids
    segment_ids = model_settings['SEGMENT_IDS']

    choices_list = []
    for segment_name, segment_id in iteritems(segment_ids):

        choosers = persons_merged_df[persons_merged_df[chooser_segment_column] == segment_id]

        # size_term and shadow price adjustment - one row per zone
        dest_size_terms = spc.dest_size_terms(segment_name)

        if choosers.shape[0] == 0:
            logger.info("%s skipping segment %s: no choosers", trace_label, segment_name)
            continue

        # - location_sample
        location_sample_df = \
            run_location_sample(
                segment_name,
                choosers,
                skim_dict,
                dest_size_terms,
                model_settings,
                chunk_size,
                tracing.extend_trace_label(trace_label, 'sample.%s' % segment_name))

        # - location_logsums
        location_sample_df = \
            run_location_logsums(
                segment_name,
                choosers,
                skim_dict, skim_stack,
                location_sample_df,
                model_settings,
                chunk_size,
                trace_hh_id,
                tracing.extend_trace_label(trace_label, 'logsums.%s' % segment_name))

        # - location_simulate
        choices = \
            run_location_simulate(
                segment_name,
                choosers,
                location_sample_df,
                skim_dict,
                dest_size_terms,
                model_settings,
                chunk_size,
                tracing.extend_trace_label(trace_label, 'simulate.%s' % segment_name))

        choices_list.append(choices)

        # FIXME - want to do this here?
        del location_sample_df
        force_garbage_collect()

    return pd.concat(choices_list) if len(choices_list) > 0 else pd.Series()
コード例 #34
0
ファイル: cdap.py プロジェクト: UDST/activitysim
def run_cdap(
        persons,
        cdap_indiv_spec,
        cdap_interaction_coefficients,
        cdap_fixed_relative_proportions,
        locals_d,
        chunk_size=0, trace_hh_id=None, trace_label=None):
    """
    Choose individual activity patterns for persons.

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data. Must contain at least a household ID, household size,
        person type category, and age, plus any columns used in cdap_indiv_spec
    cdap_indiv_spec : pandas.DataFrame
        CDAP spec for individuals without taking any interactions into account.
    cdap_interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes
    cdap_fixed_relative_proportions : pandas.DataFrame
        Spec to for the relative proportions of each activity (M, N, H)
        to choose activities for additional household members not handled by CDAP
    locals_d : Dict
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
        in either the cdap_indiv_spec or cdap_fixed_relative_proportions expression files
    chunk_size: int
        Chunk size or 0 for no chunking
    trace_hh_id : int
        hh_id to trace or None if no hh tracing
    trace_label : str
        label for tracing or None if no tracing

    Returns
    -------
    choices : pandas.DataFrame

        dataframe is indexed on _persons_index_ and has two columns:

        cdap_activity : str
            activity for that person expressed as 'M', 'N', 'H'
        cdap_rank : int
            activities for persons with cdap_rank <= MAX_HHSIZE are determined by cdap
            'extra' household members activities are assigned by cdap_fixed_relative_proportions
    """

    trace_label = tracing.extend_trace_label(trace_label, 'cdap')

    rows_per_chunk, effective_chunk_size = \
        calc_rows_per_chunk(chunk_size, persons, trace_label=trace_label)

    result_list = []
    # segment by person type and pick the right spec for each person type
    for i, num_chunks, persons_chunk in chunk.chunked_choosers_by_chunk_id(persons, rows_per_chunk):

        logger.info("Running chunk %s of %s with %d persons" % (i, num_chunks, len(persons_chunk)))

        chunk_trace_label = tracing.extend_trace_label(trace_label, 'chunk_%s' % i)

        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)

        choices = _run_cdap(persons_chunk,
                            cdap_indiv_spec,
                            cdap_interaction_coefficients,
                            cdap_fixed_relative_proportions,
                            locals_d,
                            trace_hh_id, chunk_trace_label)

        chunk.log_close(chunk_trace_label)

        result_list.append(choices)

    # FIXME: this will require 2X RAM
    # if necessary, could append to hdf5 store on disk:
    # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
コード例 #35
0
ファイル: tour_destination.py プロジェクト: UDST/activitysim
def run_tour_destination(
        tours,
        persons_merged,
        model_settings,
        skim_dict,
        skim_stack,
        chunk_size, trace_hh_id, trace_label):

    size_term_calculator = SizeTermCalculator(model_settings['SIZE_TERM_SELECTOR'])

    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    # maps segment names to compact (integer) ids
    segments = model_settings['SEGMENTS']

    # interaction_sample_simulate insists choosers appear in same order as alts
    tours = tours.sort_index()

    choices_list = []
    for segment_name in segments:

        choosers = tours[tours[chooser_segment_column] == segment_name]

        # size_term segment is segment_name
        segment_destination_size_terms = size_term_calculator.dest_size_terms_df(segment_name)

        if choosers.shape[0] == 0:
            logger.info("%s skipping segment %s: no choosers", trace_label, segment_name)
            continue

        # - destination_sample
        spec_segment_name = segment_name  # spec_segment_name is segment_name
        location_sample_df = \
            run_destination_sample(
                spec_segment_name,
                choosers,
                persons_merged,
                model_settings,
                skim_dict,
                segment_destination_size_terms,
                chunk_size,
                tracing.extend_trace_label(trace_label, 'sample.%s' % segment_name))

        # - destination_logsums
        tour_purpose = segment_name  # tour_purpose is segment_name
        location_sample_df = \
            run_destination_logsums(
                tour_purpose,
                persons_merged,
                location_sample_df,
                model_settings,
                skim_dict, skim_stack,
                chunk_size, trace_hh_id,
                tracing.extend_trace_label(trace_label, 'logsums.%s' % segment_name))

        # - destination_simulate
        spec_segment_name = segment_name  # spec_segment_name is segment_name
        choices = \
            run_destination_simulate(
                spec_segment_name,
                choosers,
                persons_merged,
                location_sample_df,
                model_settings,
                skim_dict,
                segment_destination_size_terms,
                chunk_size,
                tracing.extend_trace_label(trace_label, 'simulate.%s' % segment_name))

        choices_list.append(choices)

        # FIXME - want to do this here?
        del location_sample_df
        force_garbage_collect()

    return pd.concat(choices_list) if len(choices_list) > 0 else pd.Series()
コード例 #36
0
ファイル: trip_destination.py プロジェクト: UDST/activitysim
def compute_logsums(
        primary_purpose,
        trips,
        destination_sample,
        tours_merged,
        model_settings,
        skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice
    for each alternative since we need out-of-direction logsum
    (i.e . origin to alt_dest, and alt_dest to half-tour destination)

    Returns
    -------
        adds od_logsum and dp_logsum columns to trips (in place)
    """
    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.info("Running %s with %d samples", trace_label, destination_sample.shape[0])

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # - choosers - merge destination_sample and trips_merged
    # re/set index because pandas merge does not preserve left index if it has duplicate values!
    choosers = pd.merge(destination_sample,
                        trips_merged.reset_index(),
                        left_index=True,
                        right_on='trip_id',
                        how="left",
                        suffixes=('', '_r')).set_index('trip_id')
    assert choosers.index.equals(destination_sample.index)

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    omnibus_coefficient_spec = \
        assign.read_constant_spec(config.config_file_path(logsum_settings['COEFFS']))

    coefficient_spec = omnibus_coefficient_spec[primary_purpose]

    constants = config.get_model_constants(logsum_settings)
    locals_dict = assign.evaluate_constants(coefficient_spec, constants=constants)
    locals_dict.update(constants)

    # - od_logsums
    od_skims = {
        'ORIGIN': model_settings['TRIP_ORIGIN'],
        'DESTINATION': model_settings['ALT_DEST'],
        "odt_skims": skims['odt_skims'],
        "od_skims": skims['od_skims'],
    }
    destination_sample['od_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        od_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'od'))

    # - dp_logsums
    dp_skims = {
        'ORIGIN': model_settings['ALT_DEST'],
        'DESTINATION': model_settings['PRIMARY_DEST'],
        "odt_skims": skims['dpt_skims'],
        "od_skims": skims['dp_skims'],
    }
    destination_sample['dp_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        dp_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'dp'))
コード例 #37
0
ファイル: tour_mode_choice.py プロジェクト: UDST/activitysim
def tour_mode_choice_simulate(tours, persons_merged,
                              skim_dict, skim_stack,
                              chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    primary_tours = tours.to_frame()

    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type, value_counts=True)

    primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id',
                                    right_index=True, how='left', suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices_list = []
    for tour_type, segment in primary_tours_merged.groupby('tour_type'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" %
                    (tour_type, len(segment.index), ))

        # name index so tracing knows how to slice
        assert segment.index.name == 'tour_id'

        choices = run_tour_mode_choice_simulate(
            segment,
            spec, tour_type, model_settings,
            skims=skims,
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type,
                              choices, value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices, value_counts=True)

    # so we can trace with annotations
    primary_tours['tour_mode'] = choices

    # but only keep mode choice col
    all_tours = tours.to_frame()
    # uncomment to save annotations to table
    # assign_in_place(all_tours, annotations)
    assign_in_place(all_tours, choices.to_frame('tour_mode'))

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
コード例 #38
0
ファイル: stop_frequency.py プロジェクト: UDST/activitysim
def stop_frequency(
        tours, tours_merged,
        stop_frequency_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper
        }
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose, value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
コード例 #39
0
ファイル: trip_destination.py プロジェクト: UDST/activitysim
def run_trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id,
        trace_label):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings = config.read_model_settings('trip_destination.yaml')
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int)
    trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0)

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings:
        redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS']
        tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols]
    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skims = wrap_skims(model_settings)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose
    # e.g. size_terms.get(df.dest_taz, df.purpose)
    # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just TAZ index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST']

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num)

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=config.get_model_constants(model_settings),
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'):
                choices = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    size_term_matrix, skims,
                    chunk_size, trace_hh_id,
                    trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose))

                choices_list.append(choices)

            destinations = pd.concat(choices_list)

            failed_trip_ids = nth_trips.index.difference(destinations.index)
            if failed_trip_ids.any():
                logger.warning("%s sidelining %s trips without viable destination alternatives" %
                               (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values

            # - assign choices to these trips destinations and to next trips origin
            assign_in_place(trips, destinations.to_frame('destination'))
            destinations.index = nth_trips.next_trip_id.reindex(destinations.index)
            assign_in_place(trips, destinations.to_frame('origin'))

    del trips['next_trip_id']

    return trips
コード例 #40
0
ファイル: cdap.py プロジェクト: UDST/activitysim
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
                            trace_hh_id, trace_label):
    """
    Generate the activity choices for the 'extra' household members who weren't handled by cdap

    Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate,
    simple cross-sectional distribution is looked up for the remaining household members"

    The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec,
    EXCEPT that the values computed are relative proportions, not utilities
    (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0)

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data indexed on _persons_index_
         We expect, at least, columns [_hh_id_, _ptype_]
    cdap_fixed_relative_proportions
        spec to compute/specify the relative proportions of each activity (M, N, H)
        that should be used to choose activities for additional household members
        not handled by CDAP.
    locals_d : Dict
        dictionary of local variables that eval_variables adds to the environment
        for an evaluation of an expression that begins with @

    Returns
    -------
    choices : pandas.Series
        list of alternatives chosen for all extra members, indexed by _persons_index_
    """

    trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices')

    # extra household members have cdap_ran > MAX_HHSIZE
    choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]

    if len(choosers.index) == 0:
        return pd.Series()

    # eval the expression file
    values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)

    # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities
    proportions = values.dot(cdap_fixed_relative_proportions)

    # convert relative proportions to probability
    probs = proportions.div(proportions.sum(axis=1), axis=0)

    # select an activity pattern alternative for each person based on probability
    # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented
    # as the integer (0 based) index of the chosen column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice from column index to activity name
    choices = pd.Series(probs.columns[idx_choices].values, index=probs.index)

    # if DUMP:
    #     tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
    #                      transpose=False,
    #                      slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label,
                         columns=[None, 'rand'])

    return choices
コード例 #41
0
ファイル: trip_mode_choice.py プロジェクト: UDST/activitysim
def trip_mode_choice(
        trips,
        tours_merged,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col,
                                             skim_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    constants = config.get_model_constants(model_settings)
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)
        choices = simulate.simple_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice')

        alts = model_spec.columns
        choices = choices.map(dict(list(zip(list(range(len(alts))), alts))))

        # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose,
        #                       choices, value_counts=True)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            trips_segment['trip_mode'] = choices
            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    trips_df = trips.to_frame()
    trips_df['trip_mode'] = choices

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          choices, value_counts=True)

    assert not trips_df.trip_mode.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
コード例 #42
0
def atwork_subtour_mode_choice(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    subtours_merged = \
        pd.merge(subtours, persons_merged.to_frame(),
                 left_on='person_id', right_index=True, how='left')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0]))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours_merged.tour_type, value_counts=True)

    # setup skim keys
    orig_col_name = 'workplace_taz'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices = run_tour_mode_choice_simulate(
        subtours_merged,
        spec, tour_purpose='atwork', model_settings=model_settings,
        skims=skims,
        constants=constants,
        nest_spec=nest_spec,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    tracing.print_summary('%s choices' % trace_label, choices, value_counts=True)

    assign_in_place(tours, choices.to_frame('tour_mode'))
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id')

    force_garbage_collect()
コード例 #43
0
def vectorize_subtour_scheduling(parent_tours, subtours, persons_merged, alts, spec,
                                 model_settings,
                                 chunk_size=0, trace_label=None):
    """
    Like vectorize_tour_scheduling but specifically for atwork subtours

    subtours have a few peculiarities necessitating separate treatment:

    Timetable has to be initialized to set all timeperiods outside parent tour footprint as
    unavailable. So atwork subtour timewindows are limited to the foorprint of the parent work
    tour. And parent_tour_id' column of tours is used instead of parent_id as timetable row_id.

    Parameters
    ----------
    parent_tours : DataFrame
        parent tours of the subtours (because we need to know the tdd of the parent tour to
        assign_subtour_mask of timetable indexed by parent_tour id
    subtours : DataFrame
        atwork subtours to schedule
    persons_merged : DataFrame
        DataFrame of persons containing attributes referenced by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent time slots.  Will be passed to
        interaction_simulate in batches for each nth tour.
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
        (all subtours share same spec regardless of subtour type)
    model_settings : dict
    chunk_size
    trace_label

    Returns
    -------
    choices : Series
        A Series of choices where the index is the index of the subtours
        DataFrame and the values are the index of the alts DataFrame.
    """
    if not trace_label:
        trace_label = 'vectorize_non_mandatory_tour_scheduling'

    assert len(subtours.index) > 0
    assert 'tour_num' in subtours.columns
    assert 'tour_type' in subtours.columns

    timetable_window_id_col = 'parent_tour_id'
    tour_owner_id_col = 'parent_tour_id'
    segment = None

    # timetable with a window for each parent tour
    parent_tour_windows = tt.create_timetable_windows(parent_tours, alts)
    timetable = tt.TimeTable(parent_tour_windows, alts)

    # mask the periods outside parent tour footprint
    timetable.assign_subtour_mask(parent_tours.tour_id, parent_tours.tdd)

    # print timetable.windows
    """
    [[7 7 7 0 0 0 0 0 0 0 0 7 7 7 7 7 7 7 7 7 7]
     [7 0 0 0 0 0 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]
     [7 7 7 7 7 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7]
     [7 7 0 0 0 0 0 0 0 7 7 7 7 7 7 7 7 7 7 7 7]]
    """

    choice_list = []

    # keep a series of the the most recent tours for each person
    # initialize with first trip from alts
    previous_tour_by_parent_tour_id = \
        pd.Series(alts.index[0], index=subtours['parent_tour_id'].unique())

    # tours must be scheduled in increasing trip_num order
    # second trip of type must be in group immediately following first
    # this ought to have been ensured when tours are created (tour_frequency.process_tours)

    for tour_num, nth_tours in subtours.groupby('tour_num', sort=True):

        tour_trace_label = tracing.extend_trace_label(trace_label, 'tour_%s' % (tour_num,))

        # no more than one tour per timetable window per call to schedule_tours
        assert not nth_tours.parent_tour_id.duplicated().any()

        choices = \
            schedule_tours(nth_tours,
                           persons_merged, alts,
                           spec, segment,
                           model_settings,
                           timetable, timetable_window_id_col,
                           previous_tour_by_parent_tour_id, tour_owner_id_col,
                           chunk_size, tour_trace_label)

        choice_list.append(choices)

    choices = pd.concat(choice_list)

    # add the start, end, and duration from tdd_alts
    # assert (alts.index == list(range(alts.shape[0]))).all()
    tdd = pd.DataFrame(data=alts.values[choices.values],
                       columns=alts.columns,
                       index=choices.index)

    # tdd = alts.loc[choices]
    # tdd.index = choices.index

    # include the index of the choice in the tdd alts table
    tdd['tdd'] = choices

    # print "\nfinal timetable.windows\n", timetable.windows
    """
    [[7 7 7 0 0 0 0 2 7 7 4 7 7 7 7 7 7 7 7 7 7]
     [7 0 2 7 4 0 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]
     [7 7 7 7 7 2 4 0 0 0 0 0 0 0 0 7 7 7 7 7 7]
     [7 7 0 2 7 7 4 0 0 7 7 7 7 7 7 7 7 7 7 7 7]]
    """

    # we dont need to call replace_table() for this nonce timetable
    # because subtours are occuring during persons timetable scheduled time

    return tdd
コード例 #44
0
ファイル: location_choice.py プロジェクト: UDST/activitysim
def iterate_location_choice(
        model_settings,
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor,
        trace_label):
    """
    iterate run_location_choice updating shadow pricing until convergence criteria satisfied
    or max_iterations reached.

    (If use_shadow_pricing not enabled, then just iterate once)

    Parameters
    ----------
    model_settings : dict
    persons_merged : injected table
    persons : injected table
    skim_dict : skim.SkimDict
    skim_stack : skim.SkimStack
    chunk_size : int
    trace_hh_id : int
    locutor : bool
        whether this process is the privileged logger of shadow_pricing when multiprocessing
    trace_label : str

    Returns
    -------
    adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] and annotations to persons table
    """

    # column containing segment id
    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
    chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']

    persons_merged_df = persons_merged.to_frame()

    persons_merged_df = persons_merged_df[persons_merged[chooser_filter_column]]

    spc = shadow_pricing.load_shadow_price_calculator(model_settings)
    max_iterations = spc.max_iterations

    logging.debug("%s max_iterations: %s" % (trace_label, max_iterations))

    choices = None
    for iteration in range(1, max_iterations + 1):

        if spc.use_shadow_pricing and iteration > 1:
            spc.update_shadow_prices()

        choices = run_location_choice(
            persons_merged_df,
            skim_dict, skim_stack,
            spc,
            model_settings,
            chunk_size, trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration))

        choices_df = choices.to_frame('dest_choice')
        choices_df['segment_id'] = \
            persons_merged_df[chooser_segment_column].reindex(choices_df.index)

        spc.set_choices(choices_df)

        if locutor:
            spc.write_trace_files(iteration)

        if spc.use_shadow_pricing and spc.check_fit(iteration):
            logging.info("%s converged after iteration %s" % (trace_label, iteration,))
            break

    # - shadow price table
    if locutor:
        if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
            inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices)
        if 'MODELED_SIZE_TABLE' in model_settings:
            inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size)

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
    tracing.print_summary(dest_choice_column_name, choices, value_counts=True)

    persons_df = persons.to_frame()

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    NO_DEST_TAZ = -1
    persons_df[dest_choice_column_name] = \
        choices.reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int)

    # - annotate persons table
    if 'annotate_persons' in model_settings:
        expressions.assign_columns(
            df=persons_df,
            model_settings=model_settings.get('annotate_persons'),
            trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

        pipeline.replace_table("persons", persons_df)

        if trace_hh_id:
            tracing.trace_df(persons_df,
                             label=trace_label,
                             warn_if_empty=True)

    # - annotate households table
    if 'annotate_households' in model_settings:

        households_df = households.to_frame()
        expressions.assign_columns(
            df=households_df,
            model_settings=model_settings.get('annotate_households'),
            trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
        pipeline.replace_table("households", households_df)

        if trace_hh_id:
            tracing.trace_df(households_df,
                             label=trace_label,
                             warn_if_empty=True)

    return persons_df
コード例 #45
0
def vectorize_tour_scheduling(tours,
                              alts,
                              spec,
                              constants={},
                              chunk_size=0,
                              trace_label=None):
    """
    The purpose of this method is fairly straightforward - it takes tours
    and schedules them into time slots.  Alternatives should be specified so
    as to define those time slots (usually with start and end times).

    The difficulty of doing this in Python is that subsequent tours are
    dependent on certain characteristics of previous tours for the same
    person.  This is a problem with Python's vectorization requirement,
    so this method does all the 1st tours, then all the 2nd tours, and so forth.

    This method also adds variables that can be used in the spec which have
    to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        DataFrame of tours containing tour attributes, as well as a person_id
        column to define the nth tour for each person.
    alts : DataFrame
        DataFrame of alternatives which represent time slots.  Will be passed to
        interaction_simulate in batches for each nth tour.
    spec : DataFrame
        The spec which will be passed to interaction_simulate.

    Returns
    -------
    choices : Series
        A Series of choices where the index is the index of the tours
        DataFrame and the values are the index of the alts DataFrame.
    """

    max_num_trips = tours.groupby('person_id').size().max()

    if np.isnan(max_num_trips):
        s = pd.Series()
        s.index.name = 'tour_id'
        return s

    # because this is Python, we have to vectorize everything by doing the
    # "nth" trip for each person in a for loop (in other words, because each
    # trip is dependent on the time windows left by the previous decision) -
    # hopefully this will work out ok!

    choices = []

    # keep a series of the the most recent tours for each person
    previous_tour_by_personid = pd.Series(pd.Series(alts.index).iloc[0],
                                          index=tours.person_id.unique())

    for i in range(max_num_trips):

        # this reset_index / set_index stuff keeps the index as the tours
        # index rather that switching to person_id as the index which is
        # what happens when you groupby person_id
        index_name = tours.index.name or 'index'
        nth_tours = tours.reset_index().\
            groupby('person_id').nth(i).reset_index().set_index(index_name)

        nth_tours.index.name = 'tour_id'

        if trace_label:
            logger.info("%s running %d #%d tour choices" %
                        (trace_label, len(nth_tours), i + 1))

        # tour num can be set by the user, but if it isn't we set it here
        if "tour_num" not in nth_tours:
            nth_tours["tour_num"] = i + 1

        nth_tours = nth_tours.join(
            get_previous_tour_by_tourid(nth_tours.person_id,
                                        previous_tour_by_personid, alts))

        tour_trace_label = tracing.extend_trace_label(trace_label,
                                                      'tour_%s' % i)

        nth_choices = asim.interaction_simulate(nth_tours,
                                                alts.copy(),
                                                spec,
                                                locals_d=constants,
                                                chunk_size=chunk_size,
                                                trace_label=tour_trace_label)

        choices.append(nth_choices)

        previous_tour_by_personid.loc[nth_tours.person_id] = nth_choices.values

    choices = pd.concat(choices)

    # return the concatenated choices
    return choices
コード例 #46
0
def schedule_tours(
        tours, persons_merged, alts,
        spec, logsum_tour_purpose,
        model_settings,
        timetable, timetable_window_id_col,
        previous_tour, tour_owner_id_col,
        chunk_size, tour_trace_label):
    """
    chunking wrapper for _schedule_tours

    While interaction_sample_simulate provides chunking support, the merged tours, persons
    dataframe and the tdd_interaction_dataset are very big, so we want to create them inside
    the chunking loop to minimize memory footprint. So we implement the chunking loop here,
    and pass a chunk_size of 0 to interaction_sample_simulate to disable its chunking support.

    """

    if not tours.index.is_monotonic_increasing:
        logger.info("schedule_tours %s tours not monotonic_increasing - sorting df")
        tours = tours.sort_index()

    logger.info("%s schedule_tours running %d tour choices" % (tour_trace_label, len(tours)))

    # no more than one tour per timetable_window per call
    if timetable_window_id_col is None:
        assert not tours.index.duplicated().any()
    else:
        assert not tours[timetable_window_id_col].duplicated().any()

    rows_per_chunk, effective_chunk_size = \
        calc_rows_per_chunk(chunk_size, tours, persons_merged, alts, trace_label=tour_trace_label)

    result_list = []
    for i, num_chunks, chooser_chunk \
            in chunk.chunked_choosers(tours, rows_per_chunk):

        logger.info("Running chunk %s of %s size %d" % (i, num_chunks, len(chooser_chunk)))

        chunk_trace_label = tracing.extend_trace_label(tour_trace_label, 'chunk_%s' % i) \
            if num_chunks > 1 else tour_trace_label

        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)
        choices = _schedule_tours(chooser_chunk, persons_merged,
                                  alts, spec, logsum_tour_purpose,
                                  model_settings,
                                  timetable, timetable_window_id_col,
                                  previous_tour, tour_owner_id_col,
                                  tour_trace_label=chunk_trace_label)

        chunk.log_close(chunk_trace_label)

        result_list.append(choices)

        mem.force_garbage_collect()

    # FIXME: this will require 2X RAM
    # if necessary, could append to hdf5 store on disk:
    # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
    if len(result_list) > 1:
        choices = pd.concat(result_list)

    assert len(choices.index == len(tours.index))

    return choices
コード例 #47
0
def school_location_simulate(persons_merged, school_location_sample,
                             school_location_spec, school_location_settings,
                             skim_dict, destination_size_terms, chunk_size,
                             trace_hh_id):
    """
    School location model on school_location_sample annotated with mode_choice logsum
    to select a school_taz from sample alternatives
    """

    choosers = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()

    trace_label = 'school_location_simulate'
    alt_col_name = school_location_settings["ALT_COL_NAME"]

    constants = config.get_model_constants(school_location_settings)

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]
    tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers')

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]
        alts_segment = school_location_sample[
            school_location_sample['school_type'] == school_type]

        # alternatives are pre-sampled and annotated with logsums and pick_count
        # but we have to merge additional alt columns into alt sample list
        alts_segment = \
            pd.merge(alts_segment, destination_size_terms,
                     left_on=alt_col_name, right_index=True, how="left")

        tracing.dump_df(DUMP, alts_segment, trace_label,
                        '%s_alternatives' % school_type)

        choices = interaction_sample_simulate(
            choosers_segment,
            alts_segment,
            spec=school_location_spec[[school_type]],
            choice_column=alt_col_name,
            skims=skims,
            locals_d=locals_d,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, school_type),
            trace_choice_name='school_location')

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    choices = choices.reindex(persons_merged.index).fillna(-1).astype(int)

    tracing.dump_df(DUMP, choices, trace_label, 'choices')

    tracing.print_summary('school_taz', choices, describe=True)

    inject.add_column("persons", "school_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_school")

    if trace_hh_id:
        trace_columns = ['school_taz'
                         ] + inject.get_table('persons_school').columns
        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="school_location",
                         columns=trace_columns,
                         warn_if_empty=True)
コード例 #48
0
def vectorize_tour_scheduling(tours, persons_merged, alts,
                              spec, segment_col,
                              model_settings,
                              chunk_size=0, trace_label=None):
    """
    The purpose of this method is fairly straightforward - it takes tours
    and schedules them into time slots.  Alternatives should be specified so
    as to define those time slots (usually with start and end times).

    schedule_tours adds variables that can be used in the spec which have
    to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.




    Parameters
    ----------
    tours : DataFrame
        DataFrame of tours containing tour attributes, as well as a person_id
        column to define the nth tour for each person.
    persons_merged : DataFrame
        DataFrame of persons containing attributes referenced by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent time slots.  Will be passed to
        interaction_simulate in batches for each nth tour.
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
        (or dict of specs keyed on tour_type if tour_types is not None)
    model_settings : dict

    Returns
    -------
    choices : Series
        A Series of choices where the index is the index of the tours
        DataFrame and the values are the index of the alts DataFrame.
    timetable : TimeTable
        persons timetable updated with tours (caller should replace_table for it to persist)
    """

    trace_label = tracing.extend_trace_label(trace_label, 'vectorize_tour_scheduling')

    assert len(tours.index) > 0
    assert 'tour_num' in tours.columns
    assert 'tour_type' in tours.columns

    # tours must be scheduled in increasing trip_num order
    # second trip of type must be in group immediately following first
    # this ought to have been ensured when tours are created (tour_frequency.process_tours)

    timetable = inject.get_injectable("timetable")
    choice_list = []

    # keep a series of the the most recent tours for each person
    # initialize with first trip from alts
    previous_tour_by_personid = pd.Series(alts.index[0], index=tours.person_id.unique())

    timetable_window_id_col = 'person_id'
    tour_owner_id_col = 'person_id'

    # no more than one tour per person per call to schedule_tours
    # tours must be scheduled in increasing trip_num order
    # second trip of type must be in group immediately following first
    # segregate scheduling by tour_type if multiple specs passed in dict keyed by tour_type

    for tour_num, nth_tours in tours.groupby('tour_num', sort=True):

        tour_trace_label = tracing.extend_trace_label(trace_label, 'tour_%s' % (tour_num,))

        if isinstance(spec, dict):

            assert segment_col is not None

            for spec_segment in spec:

                segment_trace_label = tracing.extend_trace_label(tour_trace_label, spec_segment)

                in_segment = nth_tours[segment_col] == spec_segment

                if not in_segment.any():
                    logger.info("skipping empty segment %s")
                    continue

                # assume segmentation of spec and logsum coefficients are aligned
                logsum_tour_purpose = spec_segment

                choices = \
                    schedule_tours(nth_tours[in_segment],
                                   persons_merged, alts,
                                   spec[spec_segment], logsum_tour_purpose,
                                   model_settings,
                                   timetable, timetable_window_id_col,
                                   previous_tour_by_personid, tour_owner_id_col,
                                   chunk_size,
                                   segment_trace_label)

                choice_list.append(choices)

        else:

            # unsegmented spec dict indicates no logsums
            # caller could use single-element spec dict if logsum support desired,
            # but this case nor required for mtctm1
            assert segment_col is None
            logsum_segment = None

            choices = \
                schedule_tours(nth_tours,
                               persons_merged, alts,
                               spec, logsum_segment,
                               model_settings,
                               timetable, timetable_window_id_col,
                               previous_tour_by_personid, tour_owner_id_col,
                               chunk_size,
                               tour_trace_label)

            choice_list.append(choices)

    choices = pd.concat(choice_list)

    # add the start, end, and duration from tdd_alts
    # use np instead of (slower) loc[] since alts has rangeindex
    tdd = pd.DataFrame(data=alts.values[choices.values],
                       columns=alts.columns,
                       index=choices.index)

    # tdd = alts.loc[choices]
    # tdd.index = choices.index

    # include the index of the choice in the tdd alts table
    tdd['tdd'] = choices

    return tdd, timetable
コード例 #49
0
def mandatory_tour_frequency(persons_merged,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        model_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers,
        mandatory_tour_frequency_alts=alternatives
    )

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
コード例 #50
0
def vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged, alts, spec,
        model_settings,
        chunk_size=0, trace_label=None):
    """
    Like vectorize_tour_scheduling but specifically for joint tours

    joint tours have a few peculiarities necessitating separate treatment:

    Timetable has to be initialized to set all timeperiods...

    Parameters
    ----------
    tours : DataFrame
        DataFrame of tours containing tour attributes, as well as a person_id
        column to define the nth tour for each person.
    persons_merged : DataFrame
        DataFrame of persons containing attributes referenced by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent time slots.  Will be passed to
        interaction_simulate in batches for each nth tour.
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
        (or dict of specs keyed on tour_type if tour_types is not None)
    model_settings : dict

    Returns
    -------
    choices : Series
        A Series of choices where the index is the index of the tours
        DataFrame and the values are the index of the alts DataFrame.
    persons_timetable : TimeTable
        timetable updated with joint tours (caller should replace_table for it to persist)
    """

    trace_label = tracing.extend_trace_label(trace_label, 'vectorize_joint_tour_scheduling')

    assert len(joint_tours.index) > 0
    assert 'tour_num' in joint_tours.columns
    assert 'tour_type' in joint_tours.columns

    timetable_window_id_col = None
    tour_owner_id_col = 'household_id'
    segment = None

    persons_timetable = inject.get_injectable("timetable")
    choice_list = []

    # keep a series of the the most recent tours for each person
    # initialize with first trip from alts
    previous_tour_by_householdid = pd.Series(alts.index[0], index=joint_tours.household_id.unique())

    # tours must be scheduled in increasing trip_num order
    # second trip of type must be in group immediately following first
    # this ought to have been ensured when tours are created (tour_frequency.process_tours)

    # print "participant windows before scheduling\n", \
    #     persons_timetable.slice_windows_by_row_id(joint_tour_participants.person_id)

    for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True):

        tour_trace_label = tracing.extend_trace_label(trace_label, 'tour_%s' % (tour_num,))

        # no more than one tour per household per call to schedule_tours
        assert not nth_tours.household_id.duplicated().any()

        nth_participants = \
            joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)]

        timetable = build_joint_tour_timetables(
            nth_tours, nth_participants,
            persons_timetable, alts)

        choices = \
            schedule_tours(nth_tours,
                           persons_merged, alts,
                           spec, segment,
                           model_settings,
                           timetable, timetable_window_id_col,
                           previous_tour_by_householdid, tour_owner_id_col,
                           chunk_size, tour_trace_label)

        # - update timetables of all joint tour participants
        persons_timetable.assign(
            nth_participants.person_id,
            reindex(choices, nth_participants.tour_id))

        choice_list.append(choices)

    choices = pd.concat(choice_list)

    # add the start, end, and duration from tdd_alts
    # assert (alts.index == list(range(alts.shape[0]))).all()
    tdd = pd.DataFrame(data=alts.values[choices.values],
                       columns=alts.columns,
                       index=choices.index)

    # tdd = alts.loc[choices]
    # tdd.index = choices.index

    tdd.index = choices.index
    # include the index of the choice in the tdd alts table
    tdd['tdd'] = choices

    # print "participant windows after scheduling\n", \
    #     persons_timetable.slice_windows_by_row_id(joint_tour_participants.person_id)

    return tdd, persons_timetable
コード例 #51
0
def extend_tour_counts(persons, tour_counts, alternatives, trace_hh_id, trace_label):
    """
    extend tour counts based on a probability table

    counts can only be extended if original count is between 1 and 4
    and tours can only be extended if their count is at the max possible
    (e.g. 2 for escort, 1 otherwise) so escort might be increased to 3 or 4
    and other tour types might be increased to 2 or 3

    Parameters
    ----------
    persons: pandas dataframe
        (need this for join columns)
    tour_counts: pandas dataframe
        one row per person, once column per tour_type
    alternatives
        alternatives from nmtv interaction_simulate
        only need this to know max possible frequency for a tour type
    trace_hh_id
    trace_label

    Returns
    -------
    extended tour_counts


    tour_counts looks like this:
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0

    """

    assert tour_counts.index.name == persons.index.name

    PROBABILITY_COLUMNS = ['0_tours', '1_tours', '2_tours']
    JOIN_COLUMNS = ['ptype', 'has_mandatory_tour', 'has_joint_tour']
    TOUR_TYPE_COL = 'nonmandatory_tour_type'

    probs_spec = extension_probs()
    persons = persons[JOIN_COLUMNS]

    # only extend if there are 1 - 4 non_mandatory tours to start with
    extend_tour_counts = tour_counts.sum(axis=1).between(1, 4)
    if not extend_tour_counts.any():
        return tour_counts

    have_trace_targets = trace_hh_id and tracing.has_trace_targets(extend_tour_counts)

    for i, tour_type in enumerate(alternatives.columns):

        i_tour_type = i + 1  # (probs_spec nonmandatory_tour_type column is 1-based)
        tour_type_trace_label = tracing.extend_trace_label(trace_label, tour_type)

        # - only extend tour if frequency is max possible frequency for this tour type
        tour_type_is_maxed = \
            extend_tour_counts & (tour_counts[tour_type] == alternatives[tour_type].max())
        maxed_tour_count_idx = tour_counts.index[tour_type_is_maxed]

        if len(maxed_tour_count_idx) == 0:
            continue

        # - get extension probs for tour_type
        choosers = pd.merge(
            persons.loc[maxed_tour_count_idx],
            probs_spec[probs_spec[TOUR_TYPE_COL] == i_tour_type],
            on=JOIN_COLUMNS,
            how='left'
        ).set_index(maxed_tour_count_idx)
        assert choosers.index.name == tour_counts.index.name

        # - random choice of extension magnituce based on relative probs
        choices, rands = logit.make_choices(
            choosers[PROBABILITY_COLUMNS],
            trace_label=tour_type_trace_label,
            trace_choosers=choosers)

        # - extend tour_count (0-based prob alternative choice equals magnitude of extension)
        if choices.any():
            tour_counts.loc[choices.index, tour_type] += choices

        if have_trace_targets:
            tracing.trace_df(choices,
                             tracing.extend_trace_label(tour_type_trace_label, 'choices'),
                             columns=[None, 'choice'])
            tracing.trace_df(rands,
                             tracing.extend_trace_label(tour_type_trace_label, 'rands'),
                             columns=[None, 'rand'])

    return tour_counts
コード例 #52
0
def school_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                            school_location_sample, configs_dir, chunk_size,
                            trace_hh_id):
    """
    add logsum column to existing school_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in school_location_sample, and computing the logsum of all the utilities

    +-------+--------------+----------------+------------+----------------+
    | PERID | dest_TAZ     | rand           | pick_count | logsum (added) |
    +=======+==============+================+============+================+
    | 23750 |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-------+--------------+----------------+------------+----------------+
    + 23750 | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-------+--------------+----------------+------------+----------------+
    + ...   |              |                |            |                |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-------+--------------+----------------+------------+----------------+
    """

    trace_label = 'school_location_logsums'

    school_location_settings = config.read_model_settings(
        configs_dir, 'school_location.yaml')

    alt_col_name = school_location_settings["ALT_COL_NAME"]
    chooser_col_name = 'TAZ'

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()

    logger.info("Running school_location_sample with %s rows" %
                len(school_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')

    logsums_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        logsums_spec = mode_choice_logsums_spec(configs_dir, school_type)

        choosers = school_location_sample[school_location_sample['school_type']
                                          == school_type]

        choosers = pd.merge(choosers,
                            persons_merged,
                            left_index=True,
                            right_index=True,
                            how="left")

        choosers['in_period'] = skim_time_period_label(
            school_location_settings['IN_PERIOD'])
        choosers['out_period'] = skim_time_period_label(
            school_location_settings['OUT_PERIOD'])

        # FIXME - should do this in expression file?
        choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                            choosers[alt_col_name])
        choosers['dest_density_index'] = reindex(land_use.density_index,
                                                 choosers[alt_col_name])

        tracing.dump_df(DUMP, choosers,
                        tracing.extend_trace_label(trace_label, school_type),
                        'choosers')

        logsums = compute_logsums(
            choosers, logsums_spec, logsum_settings, skim_dict, skim_stack,
            chooser_col_name, alt_col_name, chunk_size, trace_hh_id,
            tracing.extend_trace_label(trace_label, school_type))

        logsums_list.append(logsums)

    logsums = pd.concat(logsums_list)

    # add_column series should have an index matching the table to which it is being added
    # logsums does, since school_location_sample was on left side of merge creating choosers
    inject.add_column("school_location_sample", "mode_choice_logsum", logsums)
コード例 #53
0
def trip_purpose_and_destination(
        trips,
        tours_merged,
        chunk_size,
        trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings('trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info('trip_destination has already been run. Rerunning failed trips')
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." % trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
コード例 #54
0
def school_location_sample(persons_merged, school_location_sample_spec,
                           school_location_settings, skim_dict,
                           destination_size_terms, chunk_size, trace_hh_id):
    """
    build a table of persons * all zones to select a sample of alternative school locations.

    PERID,  dest_TAZ, rand,            pick_count
    23750,  14,       0.565502716034,  4
    23750,  16,       0.711135838871,  6
    ...
    23751,  12,       0.408038878552,  1
    23751,  14,       0.972732479292,  2
    """

    trace_label = 'school_location_sample'

    choosers = persons_merged.to_frame()
    alternatives = destination_size_terms.to_frame()

    constants = config.get_model_constants(school_location_settings)

    sample_size = school_location_settings["SAMPLE_SIZE"]
    alt_col_name = school_location_settings["ALT_COL_NAME"]

    logger.info("Running school_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", "TAZ_r")

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]

        # FIXME - no point in considering impossible alternatives
        alternatives_segment = alternatives[alternatives[school_type] > 0]

        logger.info(
            "school_type %s:  %s persons %s alternatives" %
            (school_type, len(choosers_segment), len(alternatives_segment)))

        if len(choosers_segment.index) > 0:

            choices = interaction_sample(
                choosers_segment,
                alternatives_segment,
                sample_size=sample_size,
                alt_col_name=alt_col_name,
                spec=school_location_sample_spec[[school_type]],
                skims=skims,
                locals_d=locals_d,
                chunk_size=chunk_size,
                trace_label=tracing.extend_trace_label(trace_label,
                                                       school_type))

            choices['school_type'] = school_type
            choices_list.append(choices)

    choices = pd.concat(choices_list)

    inject.add_table('school_location_sample', choices)
コード例 #55
0
ファイル: trip_purpose.py プロジェクト: UDST/activitysim
def run_trip_purpose(
        trips_df,
        chunk_size,
        trace_hh_id,
        trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    Each trip is assigned a purpose based on an observed frequency distribution

    The distribution is segmented by tour purpose, tour direction, person type,
    and, optionally, trip depart time .

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    model_settings = config.read_model_settings('trip_purpose.yaml')
    probs_spec = trip_purpose_probs()

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'), index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(
            df=trips_df,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    rows_per_chunk, effective_chunk_size = \
        trip_purpose_rpc(chunk_size, trips_df, probs_spec, trace_label=trace_label)

    for i, num_chunks, trips_chunk in chunk.chunked_choosers(trips_df, rows_per_chunk):

        logger.info("Running chunk %s of %s size %d", i, num_chunks, len(trips_chunk))

        chunk_trace_label = tracing.extend_trace_label(trace_label, 'chunk_%s' % i) \
            if num_chunks > 1 else trace_label

        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)

        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            trace_hh_id,
            trace_label=chunk_trace_label)

        chunk.log_close(chunk_trace_label)

        result_list.append(choices)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices