Beispiel #1
0
def read_spec_file(model_settings, segments):
    """Read expressions file from a csv using instructions from model settings.

    Parameters
    ----------
    model_settings: dict, from yaml
    segments : dict, origin zone trip segments

    Returns
    -------
    pandas DataFrame
    """
    spec_file_name = model_settings.get('spec_file_name')
    spec_file_path = config.config_file_path(spec_file_name, mandatory=True)

    logger.info('reading spec file \'%s\'' % spec_file_name)
    cfg = pd.read_csv(spec_file_path, comment='#')

    expected_header = ['description', 'target', 'expression', *segments.keys()]
    if not sorted(cfg.columns.values) == sorted(expected_header):
        raise RuntimeError("Spec file requires header %s" % expected_header)

    cfg.target = cfg.target.str.strip()
    cfg.expression = cfg.expression.str.strip()

    return cfg
Beispiel #2
0
def tour_mode_choice_coeffecients_spec(model_settings):

    assert 'COEFFS' in model_settings
    coeffs_file_name = model_settings['COEFFS']

    file_path = config.config_file_path(coeffs_file_name)
    return pd.read_csv(file_path, comment='#', index_col='Expression')
Beispiel #3
0
def tour_mode_choice_coeffecients_spec(model_settings):

    assert 'COEFFS' in model_settings
    coeffs_file_name = model_settings['COEFFS']

    file_path = config.config_file_path(coeffs_file_name)
    return pd.read_csv(file_path, comment='#', index_col='Expression')
Beispiel #4
0
def compute_accessibility(land_use, accessibility, network_los, chunk_size,
                          trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(
        config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()
    if len(accessibility_df.columns) > 0:
        logger.warning(
            f"accessibility table is not empty. Columns:{list(accessibility_df.columns)}"
        )
        raise RuntimeError(f"accessibility table is not empty.")

    constants = config.get_model_constants(model_settings)

    # only include the land_use columns needed by spec, as specified by land_use_columns model_setting
    land_use_columns = model_settings.get('land_use_columns', [])
    land_use_df = land_use.to_frame()
    land_use_df = land_use_df[land_use_columns]

    logger.info(
        f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones"
    )

    accessibilities_list = []

    for i, chooser_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(accessibility_df, chunk_size, trace_label):

        accessibilities = \
            compute_accessibilities_for_zones(chooser_chunk, land_use_df, assignment_spec,
                                              constants, network_los, trace_od, trace_label)
        accessibilities_list.append(accessibilities)

    accessibility_df = pd.concat(accessibilities_list)

    logger.info(
        f"{trace_label} computed accessibilities {accessibility_df.shape}")

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)
Beispiel #5
0
def extension_probs():
    f = config.config_file_path('non_mandatory_tour_frequency_extension_probs.csv')
    df = pd.read_csv(f, comment='#')

    # convert cum probs to individual probs
    df['2_tours'] = df['2_tours'] - df['1_tours']
    df['1_tours'] = df['1_tours'] - df['0_tours']

    return df
def extension_probs():
    f = config.config_file_path('non_mandatory_tour_frequency_extension_probs.csv')
    df = pd.read_csv(f, comment='#')

    # convert cum probs to individual probs
    df['2_tours'] = df['2_tours'] - df['1_tours']
    df['1_tours'] = df['1_tours'] - df['0_tours']

    return df
Beispiel #7
0
def test_bad_coefficients():

    coefficients = pd.read_csv(config.config_file_path('cdap_interaction_coefficients.csv'), comment='#')
    coefficients = cdap.preprocess_interaction_coefficients(coefficients)

    coefficients.loc[2, 'activity'] = 'AA'

    with pytest.raises(RuntimeError) as excinfo:
        coefficients = cdap.preprocess_interaction_coefficients(coefficients)
    assert "Expect only M, N, or H" in str(excinfo.value)
Beispiel #8
0
    def write_spec(self, model_settings=None, file_name=None, tag='SPEC'):

        if model_settings is not None:
            assert file_name is None
            file_name = model_settings[tag]

        input_path = config.config_file_path(file_name)
        output_path = self.file_path(table_name=tag, file_type='csv')
        shutil.copy(input_path, output_path)
        self.debug("estimate.write_spec: %s" % output_path)
Beispiel #9
0
def tdd_alts():
    # right now this file just contains the start and end hour
    f = config.config_file_path('tour_departure_and_duration_alternatives.csv')
    df = pd.read_csv(f)

    df['duration'] = df.end - df.start

    # - NARROW
    df = df.astype(np.int8)

    return df
Beispiel #10
0
def tdd_alts():
    # right now this file just contains the start and end hour
    f = config.config_file_path('tour_departure_and_duration_alternatives.csv')
    df = pd.read_csv(f)

    df['duration'] = df.end - df.start

    # - NARROW
    df = df.astype(np.int8)

    return df
Beispiel #11
0
    def write_spec(self,
                   model_settings=None,
                   file_name=None,
                   tag='SPEC',
                   bundle_directory=False):

        if model_settings is not None:
            assert file_name is None
            file_name = model_settings[tag]

        input_path = config.config_file_path(file_name)

        table_name = tag  # more readable than full spec file_name
        output_path = self.output_file_path(table_name, 'csv',
                                            bundle_directory)
        shutil.copy(input_path, output_path)
        self.debug("estimate.write_spec: %s" % output_path)
Beispiel #12
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        model_constants = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.CONSTANTS')
        tap_tap_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

        with memo("#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
            transit_df = self.all_transit_paths(access_df, egress_df,
                                                chooser_attributes,
                                                trace_label, trace)
            # note: transit_df index is arbitrary
            chunk.log_df(trace_label, "transit_df", transit_df)

        locals_d = {'los': self.network_los}
        locals_d.update(model_constants)

        assignment_spec = assign.read_assignment_spec(
            file_name=config.config_file_path(tap_tap_settings['SPEC']))

        results, _, _ = assign.assign_variables(assignment_spec, transit_df,
                                                locals_d)
        assert len(results.columns == 1)
        transit_df['transit'] = results

        # filter out unavailable btap_atap pairs
        logger.debug(
            f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
        )
        transit_df = transit_df[transit_df.transit > 0]

        transit_df.drop(columns=chooser_attributes.columns, inplace=True)

        chunk.log_df(trace_label, "transit_df", None)

        if trace:
            self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Beispiel #13
0
def initialize_tvpb_calc_row_size(choosers, network_los, trace_label):
    """
    rows_per_chunk calculator for trip_purpose
    """

    sizer = chunk.RowSizeEstimator(trace_label)

    model_settings = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings')
    attributes_as_columns = \
        network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', [])

    #  expression_values for each spec row
    sizer.add_elements(len(choosers.columns), 'choosers')

    #  expression_values for each spec row
    sizer.add_elements(len(attributes_as_columns), 'attributes_as_columns')

    preprocessor_settings = model_settings.get('PREPROCESSOR')
    if preprocessor_settings:

        preprocessor_spec_name = preprocessor_settings.get('SPEC', None)

        if not preprocessor_spec_name.endswith(".csv"):
            preprocessor_spec_name = f'{preprocessor_spec_name}.csv'
        expressions_spec = assign.read_assignment_spec(
            config.config_file_path(preprocessor_spec_name))

        sizer.add_elements(expressions_spec.shape[0], 'preprocessor')

    #  expression_values for each spec row
    spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    sizer.add_elements(spec.shape[0], 'expression_values')

    #  expression_values for each spec row
    sizer.add_elements(spec.shape[1], 'utilities')

    row_size = sizer.get_hwm()

    return row_size
Beispiel #14
0
def test_build_cdap_spec_hhsize2(people, model_settings):

    hhsize = 2
    cdap_indiv_and_hhsize1 = simulate.read_model_spec(
        file_name='cdap_indiv_and_hhsize1.csv')

    interaction_coefficients = pd.read_csv(
        config.config_file_path('cdap_interaction_coefficients.csv'),
        comment='#')
    interaction_coefficients = cdap.preprocess_interaction_coefficients(
        interaction_coefficients)

    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})
    cdap.assign_cdap_rank(people, person_type_map)
    indiv_utils = cdap.individual_utilities(people,
                                            cdap_indiv_and_hhsize1,
                                            locals_d=None)

    choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize)

    spec = cdap.build_cdap_spec(interaction_coefficients,
                                hhsize=hhsize,
                                cache=False)

    vars = simulate.eval_variables(spec.index, choosers)

    utils = simulate.compute_utilities(vars, spec)

    expected = pd.DataFrame(
        [
            [0, 3, 0, 3, 7, 3, 0, 3, 0],  # household 3
            [0, 0, 1, 1, 1, 2, 0, 0, 2],  # household 4
        ],
        index=[3, 4],
        columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM',
                 'NN']).astype('float')

    pdt.assert_frame_equal(utils, expected, check_names=False)
def read_control_spec(data_filename):

    # read the csv file
    data_file_path = config.config_file_path(data_filename)
    if not os.path.exists(data_file_path):
        raise RuntimeError(
            "initial_seed_balancing - control file not found: %s" %
            (data_file_path, ))

    logger.info("Reading control file %s" % data_file_path)
    control_spec = pd.read_csv(data_file_path, comment='#')

    geographies = setting('geographies')

    if 'geography' not in control_spec.columns:
        raise RuntimeError("missing geography column in controls file")

    for g in control_spec.geography.unique():
        if g not in geographies:
            raise RuntimeError(
                "unknown geography column '%s' in control file" % g)

    return control_spec
Beispiel #16
0
def test_build_cdap_spec_hhsize2(people, model_settings):

    hhsize = 2
    cdap_indiv_and_hhsize1 = simulate.read_model_spec(file_name='cdap_indiv_and_hhsize1.csv')

    interaction_coefficients = pd.read_csv(config.config_file_path('cdap_interaction_coefficients.csv'), comment='#')
    interaction_coefficients = cdap.preprocess_interaction_coefficients(interaction_coefficients)

    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})

    with chunk.chunk_log('test_build_cdap_spec_hhsize2', base=True):
        cdap.assign_cdap_rank(people, person_type_map)
        indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None)

        choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize)

        spec = cdap.build_cdap_spec(interaction_coefficients, hhsize=hhsize, cache=False)

        # pandas.dot depends on column names of expression_values matching spec index values
        # expressions should have been uniquified when spec was read
        assert spec.index.is_unique

        vars = simulate.eval_variables(spec.index, choosers)
        assert (spec.index.values == vars.columns.values).all()

    # spec = spec.astype(np.float64)

    utils = vars.dot(spec)

    expected = pd.DataFrame([
        [0, 3, 0, 3, 7, 3, 0, 3, 0],  # household 3
        [0, 0, 1, 1, 1, 2, 0, 0, 2],  # household 4
        ],
        index=[3, 4],
        columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float')

    pdt.assert_frame_equal(utils, expected, check_names=False)
Beispiel #17
0
def tdd_alt_segments():

    # tour_purpose,time_period,start,end
    # work,EA,3,5
    # work,AM,6,8
    # ...
    # school,PM,15,17
    # school,EV,18,22

    file_path = config.config_file_path(
        'tour_departure_and_duration_segments.csv', mandatory=False)

    if file_path:

        df = pd.read_csv(file_path, comment='#')

        # - NARROW
        df['start'] = df['start'].astype(np.int8)
        df['end'] = df['end'].astype(np.int8)

    else:
        df = None

    return df
Beispiel #18
0
def size_terms():
    f = config.config_file_path('destination_choice_size_terms.csv')
    return pd.read_csv(f, comment='#', index_col='segment')
Beispiel #19
0
def run_trip_purpose(trips_df, estimator, chunk_size, trace_hh_id,
                     trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    each trip is assigned a purpose based on an observed frequency distribution

    The distribution should always be segmented by tour purpose and tour direction. By default it is also
    segmented by person type. The join columns can be overwritten using the "probs_join_cols" parameter in
    the model settings. The model will attempt to segment by trip depart time as well if necessary
    and depart time ranges are specified in the probability lookup table.

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    # uniform across trip_purpose
    chunk_tag = 'trip_purpose'

    model_settings_file_name = 'trip_purpose.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    probs_join_cols = model_settings.get('probs_join_cols', PROBS_JOIN_COLUMNS)

    spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv')
    probs_spec = pd.read_csv(config.config_file_path(spec_file_name),
                             comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # estimator.write_coefficients(coefficients_df, model_settings)

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    purpose = pd.Series(np.where(purpose == 'atwork', 'work', 'home'),
                        index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(df=trips_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    use_depart_time = model_settings.get('use_depart_time', True)

    for i, trips_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(trips_df, chunk_size, chunk_tag, trace_label):
        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            estimator,
            probs_join_cols=probs_join_cols,
            use_depart_time=use_depart_time,
            trace_hh_id=trace_hh_id,
            trace_label=chunk_trace_label)

        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
Beispiel #20
0
    def compute_maz_tap_utilities(self, recipe, maz_od_df, chooser_attributes,
                                  leg, mode, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 f'maz_tap_utils.{leg}')

        with chunk.chunk_log(trace_label):

            maz_tap_settings = \
                self.network_los.setting(f'TVPB_SETTINGS.{recipe}.maz_tap_settings.{mode}')
            chooser_columns = maz_tap_settings['CHOOSER_COLUMNS']
            attribute_columns = list(
                chooser_attributes.columns
            ) if chooser_attributes is not None else []
            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')

            if leg == 'access':
                maz_col = 'omaz'
                tap_col = 'btap'
            else:
                maz_col = 'dmaz'
                tap_col = 'atap'

            # maz_to_tap access/egress utilities
            # deduped utilities_df - one row per chooser for each boarding tap (btap) accessible from omaz
            utilities_df = self.network_los.maz_to_tap_dfs[mode]

            utilities_df = utilities_df[chooser_columns]. \
                reset_index(drop=False). \
                rename(columns={'MAZ': maz_col, 'TAP': tap_col})
            utilities_df = pd.merge(maz_od_df[['idx',
                                               maz_col]].drop_duplicates(),
                                    utilities_df,
                                    on=maz_col,
                                    how='inner')
            # add any supplemental chooser attributes (e.g. demographic_segment, tod)
            for c in attribute_columns:
                utilities_df[c] = reindex(chooser_attributes[c],
                                          utilities_df['idx'])

            chunk.log_df(trace_label, "utilities_df", utilities_df)

            if self.units_for_recipe(recipe) == 'utility':

                utilities_df[leg] = compute_utilities(
                    self.network_los,
                    maz_tap_settings,
                    utilities_df,
                    model_constants=model_constants,
                    trace_label=trace_label,
                    trace=trace,
                    trace_column_names=['idx', maz_col, tap_col]
                    if trace else None)

                chunk.log_df(trace_label, "utilities_df",
                             utilities_df)  # annotated

            else:

                assignment_spec = \
                    assign.read_assignment_spec(file_name=config.config_file_path(maz_tap_settings['SPEC']))

                results, _, _ = assign.assign_variables(
                    assignment_spec, utilities_df, model_constants)
                assert len(results.columns == 1)
                utilities_df[leg] = results

            chunk.log_df(trace_label, "utilities_df", utilities_df)

            if trace:
                self.trace_df(utilities_df, trace_label, 'utilities_df')

            # drop utility computation columns ('tod', 'demographic_segment' and maz_to_tap_df time/distance columns)
            utilities_df.drop(columns=attribute_columns + chooser_columns,
                              inplace=True)

        return utilities_df
Beispiel #21
0
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making at-work subtour tours
    (alternatives for this model come from a separate csv file which is
    configured by the user).
    """

    trace_label = 'atwork_subtour_frequency'

    model_settings = config.read_model_settings(
        'atwork_subtour_frequency.yaml')
    model_spec = simulate.read_model_spec(
        file_name='atwork_subtour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('atwork_subtour_frequency_alternatives.csv'),
        set_index='alt')

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    work_tours = tours[tours.tour_type == 'work']

    # - if no work_tours
    if len(work_tours) == 0:
        add_null_results(trace_label, tours)
        return

    # merge persons into work_tours
    work_tours = pd.merge(work_tours,
                          persons_merged,
                          left_on='person_id',
                          right_index=True)

    logger.info("Running atwork_subtour_frequency with %d work tours",
                len(work_tours))

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        assign_columns(df=work_tours,
                       model_settings=preprocessor_settings,
                       trace_label=trace_label)

    choices = simulate.simple_simulate(
        choosers=work_tours,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='atwork_subtour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    tracing.print_summary('atwork_subtour_frequency',
                          choices,
                          value_counts=True)

    # add atwork_subtour_frequency column to tours
    # reindex since we are working with a subset of tours
    tours['atwork_subtour_frequency'] = choices.reindex(tours.index)
    pipeline.replace_table("tours", tours)

    # - create atwork_subtours based on atwork_subtour_frequency choice names
    work_tours = tours[tours.tour_type == 'work']
    assert not work_tours.atwork_subtour_frequency.isnull().any()

    subtours = process_atwork_subtours(work_tours, alternatives)

    tours = pipeline.extend_table("tours", subtours)

    tracing.register_traceable_table('tours', subtours)
    pipeline.get_rn_generator().add_channel('tours', subtours)

    if trace_hh_id:
        tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
Beispiel #22
0
def cdap_simulate(persons_merged, persons, households, chunk_size,
                  trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    trace_label = 'cdap'
    model_settings = config.read_model_settings('cdap.yaml')

    cdap_indiv_spec = simulate.read_model_spec(
        file_name=model_settings['INDIV_AND_HHSIZE1_SPEC'])

    # Rules and coefficients for generating interaction specs for different household sizes
    cdap_interaction_coefficients = \
        pd.read_csv(config.config_file_path('cdap_interaction_coefficients.csv'), comment='#')
    """
    spec to compute/specify the relative proportions of each activity (M, N, H)
    that should be used to choose activities for additional household members not handled by CDAP
    This spec is handled much like an activitysim logit utility spec,
    EXCEPT that the values computed are relative proportions, not utilities
    (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0)
    """
    cdap_fixed_relative_proportions = \
        simulate.read_model_spec(file_name=model_settings['FIXED_RELATIVE_PROPORTIONS_SPEC'])

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    cdap_interaction_coefficients = \
        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

    # specs are built just-in-time on demand and cached as injectables
    # prebuilding here allows us to write them to the output directory
    # (also when multiprocessing locutor might not see all household sizes)
    logger.info("Pre-building cdap specs")
    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
        spec = cdap.build_cdap_spec(cdap_interaction_coefficients,
                                    hhsize,
                                    cache=True)
        if inject.get_injectable('locutor', False):
            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize),
                        index=True)

    estimator = estimation.manager.begin_estimation('cdap')
    if estimator:
        estimator.write_model_settings(model_settings, 'cdap.yaml')
        estimator.write_spec(model_settings, tag='INDIV_AND_HHSIZE1_SPEC')
        estimator.write_spec(model_settings=model_settings,
                             tag='FIXED_RELATIVE_PROPORTIONS_SPEC')
        estimator.write_table(cdap_interaction_coefficients,
                              'interaction_coefficients',
                              index=False,
                              append=False)
        estimator.write_choosers(persons_merged)
        for hhsize in range(2, cdap.MAX_HHSIZE + 1):
            spec = cdap.get_cached_spec(hhsize)
            estimator.write_table(spec, 'spec_%s' % hhsize, append=False)

    logger.info("Running cdap_simulate with %d persons",
                len(persons_merged.index))

    choices = cdap.run_cdap(
        persons=persons_merged,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'cdap_activity')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - assign results to persons table and annotate
    persons = persons.to_frame()

    choices = choices.reindex(persons.index)
    persons['cdap_activity'] = choices

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)

    # - annotate households table
    households = households.to_frame()
    expressions.assign_columns(
        df=households,
        model_settings=model_settings.get('annotate_households'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_households'))
    pipeline.replace_table("households", households)

    tracing.print_summary('cdap_activity',
                          persons.cdap_activity,
                          value_counts=True)
    logger.info(
        "cdap crosstabs:\n%s" %
        pd.crosstab(persons.ptype, persons.cdap_activity, margins=True))
def non_mandatory_tour_frequency(persons, persons_merged,
                                 chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings = config.read_model_settings('non_mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('non_mandatory_tour_frequency_alternatives.csv'),
        set_index=None)

    choosers = persons_merged.to_frame()

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_max_window': person_max_window
        }

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers))

    constants = config.get_model_constants(model_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for ptype, segment in choosers.groupby('ptype'):

        name = PTYPE_NAME[ptype]

        # pick the spec column for the segment
        spec = model_spec[[name]]

        # drop any zero-valued rows
        spec = spec[spec[name] != 0]

        logger.info("Running segment '%s' of size %d", name, len(segment))

        choices = interaction_simulate(
            segment,
            alternatives,
            spec=spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        # FIXME - force garbage collection?
        # force_garbage_collect()

    choices = pd.concat(choices_list)

    del alternatives['tot_tours']  # del tot_tours column we added above

    # - add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    # (we expect there to be an alt with no tours - which we can use to backfill non-travelers)
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)

    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    # - get counts of each of the alternatives (so we can extend)
    # (choices is just the index values for the chosen alts)
    """
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """
    tour_counts = alternatives.loc[choices]
    tour_counts.index = choices.index  # assign person ids to the index

    prev_tour_count = tour_counts.sum().sum()

    # - extend_tour_counts
    tour_counts = extend_tour_counts(choosers, tour_counts, alternatives,
                                     trace_hh_id,
                                     tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    extended_tour_count = tour_counts.sum().sum()

    logging.info("extend_tour_counts increased nmtf tour count by %s from %s to %s" %
                 (extended_tour_count - prev_tour_count, prev_tour_count, extended_tour_count))

    # - create the non_mandatory tours
    non_mandatory_tours = process_non_mandatory_tours(persons, tour_counts)
    assert len(non_mandatory_tours) == extended_tour_count

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="non_mandatory_tour_frequency.annotated_persons",
                         warn_if_empty=True)
Beispiel #24
0
def trip_mode_choice(
        trips,
        tours_merged,
        network_los,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFICIENTS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col,
                                           tod_key='trip_period', segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label, tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            # 'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        expressions.annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=trace_label,
            trace_choice_name='trip_mode_choice')

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            print(f"mode {mode} path_type {path_type}")

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[dest_col] = np.nan
                choices_df[dest_col].where(choices_df[mode_column_name] != mode, skim_cache[c], inplace=True)

    # update trips table with choices (and otionally logssums)
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name], value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Beispiel #25
0
def stop_frequency_alts():
    # alt file for building trips even though simulation is simple_simulate not interaction_simulate
    file_path = config.config_file_path('stop_frequency_alternatives.csv')
    df = pd.read_csv(file_path, comment='#')
    df.set_index('alt', inplace=True)
    return df
Beispiel #26
0
def compute_accessibility(accessibility, network_los, land_use, trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(
        config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" %
                (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)

    land_use_columns = model_settings.get('land_use_columns', [])
    land_use_df = land_use.to_frame()
    land_use_df = land_use_df[land_use_columns]

    # don't assume they are the same: accessibility may be sliced if we are multiprocessing
    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(orig_zones, dest_zone_count),
            'dest': np.tile(dest_zones, orig_zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'network_los': network_los,
    }

    skim_dict = network_los.get_default_skim_dict()
    locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df)
    locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df)

    if network_los.zone_system == los.THREE_ZONE:
        locals_d['tvpb'] = TransitVirtualPathBuilder(network_los)

    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    logger.info("{trace_label} added {len(results.columns} columns")

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning(
                f"trace_od not found origin = {trace_orig}, dest = {trace_dest}"
            )
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")
Beispiel #27
0
def compute_logsums(
        primary_purpose,
        trips,
        destination_sample,
        tours_merged,
        model_settings,
        skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice
    for each alternative since we need out-of-direction logsum
    (i.e . origin to alt_dest, and alt_dest to half-tour destination)

    Returns
    -------
        adds od_logsum and dp_logsum columns to trips (in place)
    """
    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.info("Running %s with %d samples", trace_label, destination_sample.shape[0])

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # - choosers - merge destination_sample and trips_merged
    # re/set index because pandas merge does not preserve left index if it has duplicate values!
    choosers = pd.merge(destination_sample,
                        trips_merged.reset_index(),
                        left_index=True,
                        right_on='trip_id',
                        how="left",
                        suffixes=('', '_r')).set_index('trip_id')
    assert choosers.index.equals(destination_sample.index)

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    omnibus_coefficient_spec = \
        assign.read_constant_spec(config.config_file_path(logsum_settings['COEFFS']))

    coefficient_spec = omnibus_coefficient_spec[primary_purpose]

    constants = config.get_model_constants(logsum_settings)
    locals_dict = assign.evaluate_constants(coefficient_spec, constants=constants)
    locals_dict.update(constants)

    # - od_logsums
    od_skims = {
        'ORIGIN': model_settings['TRIP_ORIGIN'],
        'DESTINATION': model_settings['ALT_DEST'],
        "odt_skims": skims['odt_skims'],
        "od_skims": skims['od_skims'],
    }
    destination_sample['od_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        od_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'od'))

    # - dp_logsums
    dp_skims = {
        'ORIGIN': model_settings['ALT_DEST'],
        'DESTINATION': model_settings['PRIMARY_DEST'],
        "odt_skims": skims['dpt_skims'],
        "od_skims": skims['dp_skims'],
    }
    destination_sample['dp_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        dp_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'dp'))
Beispiel #28
0
def compute_columns(df, model_settings, locals_dict={}, trace_label=None):
    """
    Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals

    Parameters
    ----------
    df : pandas DataFrame
        or if None, expect name of pipeline table to be specified by DF in model_settings
    model_settings : dict or str
        dict with keys:
            DF - df_alias and (additionally, if df is None) name of pipeline table to load as df
            SPEC - name of expressions file (csv suffix optional) if different from model_settings
            TABLES - list of pipeline tables to load and make available as (read only) locals
        str:
            name of yaml file in configs_dir to load dict from
    locals_dict : dict
        dict of locals (e.g. utility functions) to add to the execution environment
    trace_label

    Returns
    -------
    results: pandas.DataFrame
        one column for each expression (except temps with ALL_CAP target names)
        same index as df
    """

    if isinstance(model_settings, str):
        model_settings_name = model_settings
        model_settings = config.read_model_settings('%s.yaml' % model_settings)
        assert model_settings, "Found no model settings for %s" % model_settings_name
    else:
        model_settings_name = 'dict'
        assert isinstance(model_settings, dict)

    assert 'DF' in model_settings, \
        "Expected to find 'DF' in %s" % model_settings_name

    df_name = model_settings.get('DF')
    helper_table_names = model_settings.get('TABLES', [])
    expressions_spec_name = model_settings.get('SPEC', None)

    assert expressions_spec_name is not None, \
        "Expected to find 'SPEC' in %s" % model_settings_name

    trace_label = tracing.extend_trace_label(trace_label or '',
                                             expressions_spec_name)

    if not expressions_spec_name.endswith(".csv"):
        expressions_spec_name = '%s.csv' % expressions_spec_name
    logger.debug(
        f"{trace_label} compute_columns using expression spec file {expressions_spec_name}"
    )
    expressions_spec = assign.read_assignment_spec(
        config.config_file_path(expressions_spec_name))

    assert expressions_spec.shape[0] > 0, \
        "Expected to find some assignment expressions in %s" % expressions_spec_name

    tables = {t: inject.get_table(t).to_frame() for t in helper_table_names}

    # if df was passed in, df might be a slice, or any other table, but DF is it's local alias
    assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name
    tables[df_name] = df

    # be nice and also give it to them as df?
    tables['df'] = df

    _locals_dict = assign.local_utilities()
    _locals_dict.update(locals_dict)
    _locals_dict.update(tables)

    # FIXME a number of asim model preprocessors want skim_dict - should they request it in model_settings.TABLES?
    _locals_dict.update({
        # 'los': inject.get_injectable('network_los', None),
        'skim_dict': inject.get_injectable('skim_dict', None),
    })

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(expressions_spec,
                                  df,
                                  _locals_dict,
                                  trace_rows=tracing.trace_targets(df))

    if trace_results is not None:
        tracing.trace_df(trace_results, label=trace_label, slicer='NONE')

    if trace_assigned_locals:
        tracing.write_csv(trace_assigned_locals,
                          file_name="%s_locals" % trace_label)

    return results
Beispiel #29
0
def trip_purpose_probs():
    f = config.config_file_path('trip_purpose_probs.csv')
    df = pd.read_csv(f, comment='#')
    return df
Beispiel #30
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, path_info, trace_label,
                             trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        with chunk.chunk_log(trace_label):

            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')
            tap_tap_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

            with memo(
                    "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df, egress_df,
                                                    chooser_attributes,
                                                    trace_label, trace)
                # note: transit_df index is arbitrary
                chunk.log_df(trace_label, "transit_df", transit_df)

            # some expressions may want to know access mode -
            locals_dict = path_info.copy()
            locals_dict['los'] = self.network_los
            locals_dict.update(model_constants)

            assignment_spec = assign.read_assignment_spec(
                file_name=config.config_file_path(tap_tap_settings['SPEC']))

            DEDUPE = True
            if DEDUPE:

                # assign uid for reduping
                max_atap = transit_df.atap.max() + 1
                transit_df[
                    'uid'] = transit_df.btap * max_atap + transit_df.atap

                # dedupe
                chooser_attribute_columns = list(chooser_attributes.columns)
                unique_transit_df = \
                    transit_df.loc[~transit_df.uid.duplicated(), ['btap', 'atap', 'uid'] + chooser_attribute_columns]
                unique_transit_df.set_index('uid', inplace=True)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)

                logger.debug(
                    f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}"
                )

                # assign_variables
                results, _, _ = assign.assign_variables(
                    assignment_spec, unique_transit_df, locals_dict)
                assert len(results.columns == 1)
                unique_transit_df['transit'] = results

                # redupe results back into transit_df
                with memo("#TVPB compute_tap_tap_time redupe transit_df"):
                    transit_df['transit'] = reindex(unique_transit_df.transit,
                                                    transit_df.uid)

                del transit_df['uid']
                del unique_transit_df
                chunk.log_df(trace_label, "transit_df", transit_df)
                chunk.log_df(trace_label, "unique_transit_df", None)

            else:
                results, _, _ = assign.assign_variables(
                    assignment_spec, transit_df, locals_dict)
                assert len(results.columns == 1)
                transit_df['transit'] = results

            # filter out unavailable btap_atap pairs
            logger.debug(
                f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
            )
            transit_df = transit_df[transit_df.transit > 0]

            transit_df.drop(columns=chooser_attributes.columns, inplace=True)

            chunk.log_df(trace_label, "transit_df", None)

            if trace:
                self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Beispiel #31
0
def stop_frequency_alts():
    # alt file for building trips even though simulation is simple_simulate not interaction_simulate
    file_path = config.config_file_path('stop_frequency_alternatives.csv')
    df = pd.read_csv(file_path, comment='#')
    df.set_index('alt', inplace=True)
    return df
Beispiel #32
0
def best_transit_path_spec():
    return assign.read_assignment_spec(config.config_file_path('best_transit_path.csv'))
Beispiel #33
0
def compute_accessibility(accessibility, skim_dict, land_use, trace_od):

    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)
    land_use_columns = model_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    # #bug
    #
    # land_use_df = land_use_df[land_use_df.index % 2 == 1]
    # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5)
    #
    # print "land_use_df", land_use_df.index
    # print "accessibility_df", accessibility_df.index
    # #bug

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count)
        }
    )

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones),
        'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
Beispiel #34
0
def trip_scheduling(trips, tours, chunk_size, trace_hh_id):
    """
    Trip scheduling assigns depart times for trips within the start, end limits of the tour.

    The algorithm is simplistic:

    The first outbound trip starts at the tour start time, and subsequent outbound trips are
    processed in trip_num order, to ensure that subsequent trips do not depart before the
    trip that preceeds them.

    Inbound trips are handled similarly, except in reverse order, starting with the last trip,
    and working backwards to ensure that inbound trips do not depart after the trip that
    succeeds them.

    The probability spec assigns probabilities for depart times, but those possible departs must
    be clipped to disallow depart times outside the tour limits, the departs of prior trips, and
    in the case of work tours, the start/end times of any atwork subtours.

    Scheduling can fail if the probability table assigns zero probabilities to all the available
    depart times in a trip's depart window. (This could be avoided by giving every window a small
    probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe
    this is due to the its having been generated from a small household travel survey sample
    that lacked any departs for some time periods.)

    Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes
    fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent
    trip's ability to schedule a depart within the resulting window. But it can also happen if
    a tour is very short (e.g. one time period) and the prob spec having a zero probability for
    that tour hour.

    Therefor we need to handle trips that could not be scheduled. There are two ways (at least)
    to solve this problem:

    1) CHOOSE_MOST_INITIAL
    simply assign a depart time to the trip, even if it has a zero probability. It makes
    most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips
    are minimally impacted. This can be done in the final iteration, thus affecting only the
    trips that could no be scheduled by the standard approach

    2) drop_and_cleanup
    drop trips that could no be scheduled, and adjust their leg mates, as is done for failed
    trips in trip_destination.

    For now we are choosing among these approaches with a manifest constant, but this could
    be made a model setting...

    """
    trace_label = "trip_scheduling"

    model_settings = config.read_model_settings('trip_scheduling.yaml')
    assert 'DEPART_ALT_BASE' in model_settings

    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    probs_spec = pd.read_csv(
        config.config_file_path('trip_scheduling_probs.csv'), comment='#')

    trips_df = trips.to_frame()
    tours = tours.to_frame()

    # add tour-based chunk_id so we can chunk all trips in tour together
    trips_df['chunk_id'] = \
        reindex(pd.Series(list(range(tours.shape[0])), tours.index), trips_df.tour_id)

    max_iterations = model_settings.get('MAX_ITERATIONS', 1)
    assert max_iterations > 0

    choices_list = []
    i = 0
    while (i < max_iterations) and not trips_df.empty:

        i += 1
        last_iteration = (i == max_iterations)

        trace_label_i = tracing.extend_trace_label(trace_label, "i%s" % i)
        logger.info("%s scheduling %s trips", trace_label_i, trips_df.shape[0])

        choices = \
            run_trip_scheduling(
                trips_df,
                tours,
                probs_spec,
                model_settings,
                last_iteration=last_iteration,
                trace_hh_id=trace_hh_id,
                chunk_size=chunk_size,
                trace_label=trace_label_i)

        # boolean series of trips whose individual trip scheduling failed
        failed = choices.reindex(trips_df.index).isnull()
        logger.info("%s %s failed", trace_label_i, failed.sum())

        if not last_iteration:
            # boolean series of trips whose leg scheduling failed
            failed_cohorts = failed_trip_cohorts(trips_df, failed)
            trips_df = trips_df[failed_cohorts]
            choices = choices[~failed_cohorts]

        choices_list.append(choices)

    trips_df = trips.to_frame()

    choices = pd.concat(choices_list)
    choices = choices.reindex(trips_df.index)
    if choices.isnull().any():
        logger.warning(
            "%s of %s trips could not be scheduled after %s iterations" %
            (choices.isnull().sum(), trips_df.shape[0], i))

        if failfix != FAILFIX_DROP_AND_CLEANUP:
            raise RuntimeError("%s setting '%s' not enabled in settings" %
                               (FAILFIX, FAILFIX_DROP_AND_CLEANUP))

        trips_df['failed'] = choices.isnull()
        trips_df = cleanup_failed_trips(trips_df)
        choices = choices.reindex(trips_df.index)

    trips_df['depart'] = choices

    assert not trips_df.depart.isnull().any()

    pipeline.replace_table("trips", trips_df)
Beispiel #35
0
def trip_mode_choice(
        trips,
        tours_merged,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col,
                                             skim_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    constants = config.get_model_constants(model_settings)
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)
        choices = simulate.simple_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice')

        alts = model_spec.columns
        choices = choices.map(dict(list(zip(list(range(len(alts))), alts))))

        # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose,
        #                       choices, value_counts=True)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            trips_segment['trip_mode'] = choices
            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    trips_df = trips.to_frame()
    trips_df['trip_mode'] = choices

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          choices, value_counts=True)

    assert not trips_df.trip_mode.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def mandatory_tour_frequency(persons_merged,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        model_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers,
        mandatory_tour_frequency_alts=alternatives
    )

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
Beispiel #37
0
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings(
        'mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(
        file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'),
        set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons",
                len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index).reindex(
                            persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers, mandatory_tour_frequency_alts=alternatives)

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(
        persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency',
                          persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
Beispiel #38
0
def trip_scheduling(trips, tours, chunk_size, trace_hh_id):
    """
    Trip scheduling assigns depart times for trips within the start, end limits of the tour.

    The algorithm is simplistic:

    The first outbound trip starts at the tour start time, and subsequent outbound trips are
    processed in trip_num order, to ensure that subsequent trips do not depart before the
    trip that preceeds them.

    Inbound trips are handled similarly, except in reverse order, starting with the last trip,
    and working backwards to ensure that inbound trips do not depart after the trip that
    succeeds them.

    The probability spec assigns probabilities for depart times, but those possible departs must
    be clipped to disallow depart times outside the tour limits, the departs of prior trips, and
    in the case of work tours, the start/end times of any atwork subtours.

    Scheduling can fail if the probability table assigns zero probabilities to all the available
    depart times in a trip's depart window. (This could be avoided by giving every window a small
    probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe
    this is due to the its having been generated from a small household travel survey sample
    that lacked any departs for some time periods.)

    Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes
    fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent
    trip's ability to schedule a depart within the resulting window. But it can also happen if
    a tour is very short (e.g. one time period) and the prob spec having a zero probability for
    that tour hour.

    Therefore we need to handle trips that could not be scheduled. There are two ways (at least)
    to solve this problem:

    1) choose_most_initial
    simply assign a depart time to the trip, even if it has a zero probability. It makes
    most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips
    are minimally impacted. This can be done in the final iteration, thus affecting only the
    trips that could no be scheduled by the standard approach

    2) drop_and_cleanup
    drop trips that could no be scheduled, and adjust their leg mates, as is done for failed
    trips in trip_destination.

    Which option is applied is determined by the FAILFIX model setting

    """
    trace_label = "trip_scheduling"
    model_settings_file_name = 'trip_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    trips_df = trips.to_frame()
    tours = tours.to_frame()

    # add columns 'tour_hour', 'earliest', 'latest' to trips
    set_tour_hour(trips_df, tours)

    # trip_scheduling is a probabilistic model ane we don't support estimation,
    # but we do need to override choices in estimation mode
    estimator = estimation.manager.begin_estimation('trip_scheduling')
    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        chooser_cols_for_estimation = [
            'person_id',
            'household_id',
            'tour_id',
            'trip_num',
            'trip_count',
            'primary_purpose',
            'outbound',
            'earliest',
            'latest',
            'tour_hour',
        ]
        estimator.write_choosers(trips_df[chooser_cols_for_estimation])

    probs_spec = pd.read_csv(
        config.config_file_path('trip_scheduling_probs.csv'), comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    # add tour-based chunk_id so we can chunk all trips in tour together
    trips_df['chunk_id'] = reindex(
        pd.Series(list(range(len(tours))), tours.index), trips_df.tour_id)

    assert 'DEPART_ALT_BASE' in model_settings
    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    max_iterations = model_settings.get('MAX_ITERATIONS', 1)
    assert max_iterations > 0

    choices_list = []

    for chunk_i, trips_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers_by_chunk_id(
            trips_df, chunk_size, trace_label, trace_label):

        i = 0
        while (i < max_iterations) and not trips_chunk.empty:

            # only chunk log first iteration since memory use declines with each iteration
            with chunk.chunk_log(
                    trace_label) if i == 0 else chunk.chunk_log_skip():

                i += 1
                is_last_iteration = (i == max_iterations)

                trace_label_i = tracing.extend_trace_label(
                    trace_label, "i%s" % i)
                logger.info("%s scheduling %s trips within chunk %s",
                            trace_label_i, trips_chunk.shape[0], chunk_i)

                choices = \
                    run_trip_scheduling(
                        trips_chunk,
                        tours,
                        probs_spec,
                        model_settings,
                        estimator=estimator,
                        is_last_iteration=is_last_iteration,
                        trace_hh_id=trace_hh_id,
                        chunk_size=chunk_size,
                        chunk_tag=trace_label,
                        trace_label=trace_label_i)

                # boolean series of trips whose individual trip scheduling failed
                failed = choices.reindex(trips_chunk.index).isnull()
                logger.info("%s %s failed", trace_label_i, failed.sum())

                if not is_last_iteration:
                    # boolean series of trips whose leg scheduling failed
                    failed_cohorts = failed_trip_cohorts(trips_chunk, failed)
                    trips_chunk = trips_chunk[failed_cohorts]
                    choices = choices[~failed_cohorts]

                choices_list.append(choices)

    trips_df = trips.to_frame()

    choices = pd.concat(choices_list)
    choices = choices.reindex(trips_df.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'trips',
                                              'depart')  # override choices
        estimator.write_override_choices(choices)
        estimator.end_estimation()
        assert not choices.isnull().any()

    if choices.isnull().any():
        logger.warning(
            "%s of %s trips could not be scheduled after %s iterations" %
            (choices.isnull().sum(), trips_df.shape[0], i))

        if failfix != FAILFIX_DROP_AND_CLEANUP:
            raise RuntimeError("%s setting '%s' not enabled in settings" %
                               (FAILFIX, FAILFIX_DROP_AND_CLEANUP))

        trips_df['failed'] = choices.isnull()
        trips_df = cleanup_failed_trips(trips_df)
        choices = choices.reindex(trips_df.index)

    trips_df['depart'] = choices

    assert not trips_df.depart.isnull().any()

    pipeline.replace_table("trips", trips_df)
Beispiel #39
0
def buffer_zones_spec(buffer_zones_settings):
    spec_path = config.config_file_path(
        buffer_zones_settings['buffer_zones_spec'])
    return buffer.read_buffer_spec(spec_path)
Beispiel #40
0
def cdap_interaction_coefficients():
    """
    Rules and coefficients for generating interaction specs for different household sizes
    """
    f = config.config_file_path('cdap_interaction_coefficients.csv')
    return pd.read_csv(f, comment='#')