コード例 #1
0
ファイル: trip_destination.py プロジェクト: UDST/activitysim
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size, trace_hh_id,
        trace_label)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:
        trips_df = cleanup_failed_trips(trips_df)
    elif trips_df.failed.any():
        logger.warning("%s keeping %s sidelined failed trips" %
                       (trace_label, trips_df.failed.sum()))

    pipeline.replace_table("trips", trips_df)

    print("trips_df\n", trips_df.shape)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
コード例 #2
0
ファイル: shadow_pricing.py プロジェクト: UDST/activitysim
    def write_trace_files(self, iteration):
        """
        Write trace files for this iteration
        Writes desired_size, modeled_size, and shadow_prices tables

        Trace file names are tagged with model_selector and iteration number
        (e.g. self.desired_size => shadow_price_school_desired_size_1)

        Parameters
        ----------
        iteration: int
            current iteration to tag trace file
        """
        logger.info("write_trace_files iteration %s" % iteration)
        if iteration == 1:
            # write desired_size only on first iteration, as it doesn't change
            tracing.write_csv(self.desired_size,
                              'shadow_price_%s_desired_size' % self.model_selector,
                              transpose=False)

        tracing.write_csv(self.modeled_size,
                          'shadow_price_%s_modeled_size_%s' % (self.model_selector, iteration),
                          transpose=False)

        if self.use_shadow_pricing:
            tracing.write_csv(self.shadow_prices,
                              'shadow_price_%s_shadow_prices_%s' % (self.model_selector, iteration),
                              transpose=False)
コード例 #3
0
ファイル: trip_scheduling.py プロジェクト: ymaVTA/activitysim
def report_bad_choices(bad_row_map,
                       df,
                       filename,
                       trace_label,
                       trace_choosers=None):
    """

    Parameters
    ----------
    bad_row_map
    df : pandas.DataFrame
        utils or probs dataframe
    trace_choosers : pandas.dataframe
        the choosers df (for interaction_simulate) to facilitate the reporting of hh_id
        because we  can't deduce hh_id from the interaction_dataset which is indexed on index
        values from alternatives df

    """

    df = df[bad_row_map]
    if trace_choosers is None:
        hh_ids = tracing.hh_id_for_chooser(df.index, df)
    else:
        hh_ids = tracing.hh_id_for_chooser(df.index, trace_choosers)
    df['household_id'] = hh_ids

    filename = "%s.%s" % (trace_label, filename)

    logger.info("dumping %s" % filename)
    tracing.write_csv(df, file_name=filename, transpose=False)

    # log the indexes of the first MAX_PRINT offending rows
    MAX_PRINT = 0
    for idx in df.index[:MAX_PRINT].values:

        row_msg = "%s : failed %s = %s (hh_id = %s)" % \
                  (trace_label, df.index.name, idx, df.household_id.loc[idx])

        logger.warning(row_msg)
コード例 #4
0
ファイル: link.py プロジェクト: figo2002/bca4abm
def eval_link_spec(link_spec, link_file_names, data_dir,
                   link_file_column_map, link_index_fields,
                   model_settings, trace_tag=None, trace_od=None):

    # accept a single string as well as a dict of {suffix: filename}
    if isinstance(link_file_names, str):
        link_file_names = {"": link_file_names}

    locals_dict = config.get_model_constants(model_settings)
    locals_dict.update(config.setting('globals'))
    locals_dict = add_tables_to_locals(data_dir, model_settings, locals_dict)

    results = {}

    for scenario in ['base', 'build']:

        logger.debug("eval_link_spec scenario %s" % scenario)

        link_data_subdir = 'base-data' if scenario == 'base' else 'build-data'

        df_list = []
        for suffix, link_file_name in list(link_file_names.items()):

            df = read_csv_file(data_dir=os.path.join(data_dir, link_data_subdir),
                               file_name=link_file_name,
                               column_map=link_file_column_map)

            if link_index_fields:
                df.set_index(link_index_fields, drop=True, inplace=True)
            if suffix:
                df = df.add_suffix("_" + suffix)

            df_list.append(df)

        links_df = pd.concat(df_list, axis=1)

        # copy index fields into columns
        if link_index_fields:
            links_df = links_df.reset_index().set_index(link_index_fields, drop=False)

        if trace_od:
            od_column = model_settings.get('od_column', None)
            if od_column:
                o, d = trace_od
                trace_rows = (links_df[od_column] == o) | (links_df[od_column] == d)
            else:
                # just dump first row
                trace_rows = (links_df.index == 1)
        else:
            trace_rows = None

        summary, trace_results, trace_assigned_locals = \
            bca.eval_and_sum(link_spec,
                             links_df,
                             locals_dict,
                             df_alias='links',
                             trace_rows=trace_rows)

        results[scenario] = summary

        if trace_results is not None:
            # FIXME: manually setting df.index.name to prevent
            # activitysim.tracing.write_df_csv() from attempting to reset the index.
            # write_df_csv() should be able to handle a multi-index dataframe.
            trace_results.index.name = trace_results.index.names[0]
            tracing.write_csv(trace_results,
                              file_name="%s_results_%s" % (trace_tag, scenario),
                              index_label='index',
                              column_labels=['label', 'link'])

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="%s_locals_%s" % (trace_tag, scenario))

    results = results['build'] - results['base']

    results.reset_index(drop=True, inplace=True)

    return results
コード例 #5
0
def trip_purpose_and_destination(
        trips,
        tours_merged,
        chunk_size,
        trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings('trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info('trip_destination has already been run. Rerunning failed trips')
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." % trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
コード例 #6
0
ファイル: models.py プロジェクト: UDST/activitysim
def best_transit_path(set_random_seed,
                      network_los,
                      best_transit_path_spec):

    model_settings = config.read_model_settings('best_transit_path.yaml')

    logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE)

    omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE)
    od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod})

    trace_od = (od_df.omaz[0], od_df.dmaz[0])
    logger.info("trace_od omaz %s dmaz %s" % trace_od)

    # build exploded atap_btap_df

    # FIXME - pathological knowledge about mode - should be parameterized
    # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap
    atap_btap_df = network_los.get_tappairs_mazpairs(od_df.omaz, od_df.dmaz,
                                                     ofilter='drive_time',
                                                     dfilter='walk_alightingActual')

    # add in tod column
    atap_btap_df = atap_btap_df.merge(
        right=od_df[['tod']],
        left_on='idx',
        right_index=True,
        how='left'
    )

    logger.info("len od_df %s", len(od_df.index))
    logger.info("len atap_btap_df %s", len(atap_btap_df.index))
    logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index))))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest)
    else:
        trace_oabd_rows = None

    constants = config.get_model_constants(model_settings)
    locals_d = {
        'np': np,
        'network_los': network_los
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d,
                                  trace_rows=trace_oabd_rows)

    # copy results
    for column in results.columns:
        atap_btap_df[column] = results[column]

    # drop rows if no utility
    n = len(atap_btap_df.index)
    atap_btap_df = atap_btap_df.dropna(subset=['utility'])

    logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n)

    # choose max utility
    atap_btap_df = atap_btap_df.sort_values(by='utility').groupby('idx').tail(1)

    if trace_od:

        if not trace_oabd_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest)
        else:

            tracing.trace_df(atap_btap_df,
                             label='best_transit_path',
                             slicer='NONE',
                             transpose=False)

            tracing.trace_df(trace_results,
                             label='trace_best_transit_path',
                             slicer='NONE',
                             transpose=False)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
コード例 #7
0
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings(
        'trip_purpose_and_destination.yaml')

    # for consistency, read sample_table_name setting from trip_destination settings file
    trip_destination_model_settings = config.read_model_settings(
        'trip_destination.yaml')
    sample_table_name = trip_destination_model_settings.get(
        'DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:

        if 'failed' not in trips_df.columns:
            # trip_destination model cleaned up any failed trips
            logger.info("%s - no failed column from prior model run." %
                        trace_label)
            return

        elif not trips_df.failed.any():
            # 'failed' column but no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." %
                        trace_label)
            trips_df.drop(columns='failed', inplace=True)
            pipeline.replace_table("trips", trips_df)
            return

        else:
            logger.info(
                "trip_destination has already been run. Rerunning failed trips"
            )
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
                trips_df.tour_id)]
            logger.info("Rerunning %s failed trips and leg-mates" %
                        trips_df.shape[0])

            # drop any previously saved samples of failed trips
            if want_sample_table and pipeline.is_table(sample_table_name):
                logger.info(
                    "Dropping any previously saved samples of failed trips")
                save_sample_df = pipeline.get_table(sample_table_name)
                save_sample_df.drop(trips_df.index,
                                    level='trip_id',
                                    inplace=True)
                pipeline.replace_table(sample_table_name, save_sample_df)
                del save_sample_df

    # if we estimated trip_destination, there should have been no failed trips
    # if we didn't, but it is enabled, it is probably a configuration error
    # if we just estimated trip_purpose, it isn't clear what they are trying to do , nor how to handle it
    assert not (estimation.manager.begin_estimation('trip_purpose')
                or estimation.manager.begin_estimation('trip_destination'))

    processed_trips = []
    save_samples = []
    i = 0
    TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in TRIP_RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df, save_sample_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size=chunk_size,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        # # if testing, make sure at least one trip fails
        if config.setting('testing_fail_trip_destination', False) \
                and (i == 1) and not trips_df.failed.any():
            fail_o = trips_df[
                trips_df.trip_num < trips_df.trip_count].origin.max()
            trips_df.failed = (trips_df.origin == fail_o) & \
                              (trips_df.trip_num < trips_df.trip_count)

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_samples.append(save_sample_df)
            break

        logger.warning("%s %s failed trips in iteration %s" %
                       (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

        # if max iterations reached, add remaining trips to processed_trips and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_sample_df.drop(trips_df[trips_df.failed].index,
                                    level='trip_id',
                                    inplace=True)
                save_samples.append(save_sample_df)
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to processed_trips
        processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
            trips_df.tour_id)]

        #  add trip samples of processed_trips to processed_samples
        if save_sample_df is not None:
            # drop failed trip samples
            save_sample_df.drop(trips_df.index, level='trip_id', inplace=True)
            save_samples.append(save_sample_df)

    # - assign result columns to trips
    processed_trips = pd.concat(processed_trips)

    if len(save_samples) > 0:
        save_sample_df = pd.concat(save_samples)
        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))
        pipeline.extend_table(sample_table_name, save_sample_df)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, processed_trips.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, processed_trips)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    # check to make sure we wrote sample file if requestsd
    if want_sample_table and len(trips_df) > 0:
        assert pipeline.is_table(sample_table_name)
        # since we have saved samples for all successful trips
        # once we discard failed trips, we should samples for all trips
        save_sample_df = pipeline.get_table(sample_table_name)
        # expect samples only for intermediate trip destinatinos
        assert \
            len(save_sample_df.index.get_level_values(0).unique()) == \
            len(trips_df[trips_df.trip_num < trips_df.trip_count])
        del save_sample_df

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
コード例 #8
0
def best_transit_path(set_random_seed, network_los, best_transit_path_spec):

    model_settings = config.read_model_settings('best_transit_path.yaml')

    logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE)

    omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE)
    od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod})

    trace_od = (od_df.omaz[0], od_df.dmaz[0])
    logger.info("trace_od omaz %s dmaz %s" % trace_od)

    # build exploded atap_btap_df

    # FIXME - pathological knowledge about mode - should be parameterized
    # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap
    atap_btap_df = network_los.get_tappairs_mazpairs(
        od_df.omaz,
        od_df.dmaz,
        ofilter='drive_time',
        dfilter='walk_alightingActual')

    # add in tod column
    atap_btap_df = atap_btap_df.merge(right=od_df[['tod']],
                                      left_on='idx',
                                      right_index=True,
                                      how='left')

    logger.info("len od_df %s", len(od_df.index))
    logger.info("len atap_btap_df %s", len(atap_btap_df.index))
    logger.info("avg explosion %s",
                (len(atap_btap_df.index) / (1.0 * len(od_df.index))))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_oabd_rows = (atap_btap_df.omaz
                           == trace_orig) & (atap_btap_df.dmaz == trace_dest)
    else:
        trace_oabd_rows = None

    constants = config.get_model_constants(model_settings)
    locals_d = {'np': np, 'network_los': network_los}
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d,
                                  trace_rows=trace_oabd_rows)

    # copy results
    for column in results.columns:
        atap_btap_df[column] = results[column]

    # drop rows if no utility
    n = len(atap_btap_df.index)
    atap_btap_df = atap_btap_df.dropna(subset=['utility'])

    logger.info("Dropped %s of %s rows with null utility",
                n - len(atap_btap_df.index), n)

    # choose max utility
    atap_btap_df = atap_btap_df.sort_values(
        by='utility').groupby('idx').tail(1)

    if trace_od:

        if not trace_oabd_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s",
                           trace_orig, trace_dest)
        else:

            tracing.trace_df(atap_btap_df,
                             label='best_transit_path',
                             slicer='NONE',
                             transpose=False)

            tracing.trace_df(trace_results,
                             label='trace_best_transit_path',
                             slicer='NONE',
                             transpose=False)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="trace_best_transit_path_locals")
コード例 #9
0
def compute_accessibility(accessibility, network_los, land_use, trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(
        config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" %
                (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)

    land_use_columns = model_settings.get('land_use_columns', [])
    land_use_df = land_use.to_frame()
    land_use_df = land_use_df[land_use_columns]

    # don't assume they are the same: accessibility may be sliced if we are multiprocessing
    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(orig_zones, dest_zone_count),
            'dest': np.tile(dest_zones, orig_zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'network_los': network_los,
    }

    skim_dict = network_los.get_default_skim_dict()
    locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df)
    locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df)

    if network_los.zone_system == los.THREE_ZONE:
        locals_d['tvpb'] = TransitVirtualPathBuilder(network_los)

    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    logger.info("{trace_label} added {len(results.columns} columns")

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning(
                f"trace_od not found origin = {trace_orig}, dest = {trace_dest}"
            )
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")
コード例 #10
0
ファイル: accessibility.py プロジェクト: UDST/activitysim
def compute_accessibility(accessibility, skim_dict, land_use, trace_od):

    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)
    land_use_columns = model_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    # #bug
    #
    # land_use_df = land_use_df[land_use_df.index % 2 == 1]
    # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5)
    #
    # print "land_use_df", land_use_df.index
    # print "accessibility_df", accessibility_df.index
    # #bug

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count)
        }
    )

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones),
        'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
コード例 #11
0
def demographics_processor(persons, persons_merged, demographics_spec,
                           demographics_settings, chunk_size, trace_hh_id):

    # the choice model will be applied to each row of the choosers table (a pandas.DataFrame)
    persons_df = persons_merged.to_frame()

    logger.info(
        "Running demographics_processor with %d persons (chunk size = %s)" %
        (len(persons_df), chunk_size))

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(demographics_settings)
    locals_dict.update(config.setting('globals'))

    trace_rows = trace_hh_id and persons_df['household_id'] == trace_hh_id

    # eval_variables evaluates each of the expressions in spec
    # in the context of each row in of the choosers dataframe
    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(demographics_spec,
                                  persons_df,
                                  locals_dict,
                                  df_alias='persons',
                                  trace_rows=trace_rows)

    # add assigned columns to persons as they are needed by downstream processors
    persons = persons.to_frame()
    assign_in_place(persons, results)
    pipeline.replace_table("persons", persons)

    # coc groups with counts
    # TODO - should we allow specifying which assigned columns are coc (e.g. in settings?)
    # for now, assume all assigned columns are coc, but this could cramp modelers style
    # if they want to create additional demographic columns for downstream use that aren't coc
    coc_columns = list(results.columns)

    inject.add_injectable("coc_column_names", coc_columns)

    # - create table with coc columns as indexes and a single column 'persons' with counts
    # index                        persons
    # coc_poverty coc_age
    # False       False            20
    #             True              3
    # True        False             4
    coc_grouped = results.groupby(coc_columns)
    coc_grouped = coc_grouped[coc_columns[0]].count().to_frame(name='persons')

    pipeline.replace_table("coc_results", coc_grouped)

    add_summary_results(coc_grouped)

    if trace_hh_id:

        if trace_results is not None:

            tracing.write_csv(trace_results,
                              file_name="demographics",
                              index_label='person_idx',
                              column_labels=['label', 'person'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="demographics_locals")
コード例 #12
0
def compute_accessibilities_for_zones(accessibility_df, land_use_df,
                                      assignment_spec, constants, network_los,
                                      trace_od, trace_label):

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d orig zones %d dest zones" %
                (trace_label, orig_zone_count, dest_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(orig_zones, dest_zone_count),
            'dest': np.tile(dest_zones, orig_zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()
    chunk.log_df(trace_label, "od_df", od_df)

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'network_los': network_los,
    }
    locals_d.update(constants)

    skim_dict = network_los.get_default_skim_dict()
    locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df)
    locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df)

    if network_los.zone_system == los.THREE_ZONE:
        locals_d['tvpb'] = network_los.tvpb

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d,
                                  trace_rows=trace_od_rows, trace_label=trace_label, chunk_log=True)

    chunk.log_df(trace_label, "results", results)

    # accessibility_df = accessibility_df.copy()
    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning(
                f"trace_od not found origin = {trace_orig}, dest = {trace_dest}"
            )
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")

    return (accessibility_df)
コード例 #13
0
def compute_columns(df, model_settings, locals_dict={}, trace_label=None):
    """
    Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals

    Parameters
    ----------
    df : pandas DataFrame
        or if None, expect name of pipeline table to be specified by DF in model_settings
    model_settings : dict or str
        dict with keys:
            DF - df_alias and (additionally, if df is None) name of pipeline table to load as df
            SPEC - name of expressions file (csv suffix optional) if different from model_settings
            TABLES - list of pipeline tables to load and make available as (read only) locals
        str:
            name of yaml file in configs_dir to load dict from
    locals_dict : dict
        dict of locals (e.g. utility functions) to add to the execution environment
    trace_label

    Returns
    -------
    results: pandas.DataFrame
        one column for each expression (except temps with ALL_CAP target names)
        same index as df
    """

    if isinstance(model_settings, str):
        model_settings_name = model_settings
        model_settings = config.read_model_settings('%s.yaml' % model_settings)
        assert model_settings, "Found no model settings for %s" % model_settings_name
    else:
        model_settings_name = 'dict'
        assert isinstance(model_settings, dict)

    assert 'DF' in model_settings, \
        "Expected to find 'DF' in %s" % model_settings_name

    df_name = model_settings.get('DF')
    helper_table_names = model_settings.get('TABLES', [])
    expressions_spec_name = model_settings.get('SPEC', None)

    assert expressions_spec_name is not None, \
        "Expected to find 'SPEC' in %s" % model_settings_name

    trace_label = tracing.extend_trace_label(trace_label or '',
                                             expressions_spec_name)

    if not expressions_spec_name.endswith(".csv"):
        expressions_spec_name = '%s.csv' % expressions_spec_name
    logger.debug(
        f"{trace_label} compute_columns using expression spec file {expressions_spec_name}"
    )
    expressions_spec = assign.read_assignment_spec(
        config.config_file_path(expressions_spec_name))

    assert expressions_spec.shape[0] > 0, \
        "Expected to find some assignment expressions in %s" % expressions_spec_name

    tables = {t: inject.get_table(t).to_frame() for t in helper_table_names}

    # if df was passed in, df might be a slice, or any other table, but DF is it's local alias
    assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name
    tables[df_name] = df

    # be nice and also give it to them as df?
    tables['df'] = df

    _locals_dict = assign.local_utilities()
    _locals_dict.update(locals_dict)
    _locals_dict.update(tables)

    # FIXME a number of asim model preprocessors want skim_dict - should they request it in model_settings.TABLES?
    _locals_dict.update({
        # 'los': inject.get_injectable('network_los', None),
        'skim_dict': inject.get_injectable('skim_dict', None),
    })

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(expressions_spec,
                                  df,
                                  _locals_dict,
                                  trace_rows=tracing.trace_targets(df))

    if trace_results is not None:
        tracing.trace_df(trace_results, label=trace_label, slicer='NONE')

    if trace_assigned_locals:
        tracing.write_csv(trace_assigned_locals,
                          file_name="%s_locals" % trace_label)

    return results
コード例 #14
0
def physical_activity_processor(trips_with_demographics, persons_merged,
                                physical_activity_trip_spec,
                                physical_activity_person_spec,
                                physical_activity_settings, coc_column_names,
                                settings, chunk_size, trace_hh_id):
    """
    Compute physical benefits

    Physical activity benefits generally accrue if the net physical activity for an individual
    exceeds a certain threshold. We calculate individual physical activity based on trips,
    so we need to compute trip activity and then sum up to the person level to calculate benefits.
    We chunk trips by household id to ensure that all of a persons trips are in the same chunk.
    """

    trips_df = trips_with_demographics.to_frame()
    persons_df = persons_merged.to_frame()
    trace_label = 'physical_activity'

    logger.info(
        "Running physical_activity_processor with %d trips for %d persons " %
        (len(trips_df), len(persons_df)))

    locals_dict = config.get_model_constants(physical_activity_settings)
    locals_dict.update(config.setting('globals'))

    trip_trace_rows = trace_hh_id and trips_df.household_id == trace_hh_id

    rows_per_chunk, effective_chunk_size = \
        physical_activity_rpc(chunk_size, trips_df, persons_df,
                              physical_activity_trip_spec, trace_label)

    logger.info("physical_activity_processor chunk_size %s rows_per_chunk %s" %
                (chunk_size, rows_per_chunk))

    coc_summary = None
    result_list = []

    # iterate over trips df chunked by hh_id
    for i, num_chunks, trips_chunk, trace_rows_chunk \
            in bca.chunked_df_by_chunk_id(trips_df, trip_trace_rows, rows_per_chunk):

        logger.info("%s chunk %s of %s" % (trace_label, i, num_chunks))

        trip_activity, trip_trace_results, trip_trace_assigned_locals = \
            assign.assign_variables(physical_activity_trip_spec,
                                    trips_chunk,
                                    locals_dict=locals_dict,
                                    df_alias='trips',
                                    trace_rows=trace_rows_chunk)

        # since tracing is at household level, trace_results will occur in only one chunk
        # we can just write them out when we see them without need to accumulate across chunks
        if trip_trace_results is not None:
            tracing.write_csv(trip_trace_results,
                              file_name="physical_activity_trips",
                              index_label='trip_id',
                              column_labels=['label', 'trip'])

            if trip_trace_assigned_locals:
                tracing.write_csv(trip_trace_assigned_locals,
                                  file_name="physical_activity_trips_locals")

        # sum trip activity for each unique person
        trip_activity = trip_activity.groupby(trips_chunk.person_id).sum()

        # merge in persons columns for this chunk
        persons_chunk = pd.merge(trip_activity,
                                 persons_df,
                                 left_index=True,
                                 right_index=True)

        # trace rows array for this chunk
        person_trace_rows = trace_hh_id and persons_chunk[
            'household_id'] == trace_hh_id

        person_activity, person_trace_results, person_trace_assigned_locals = \
            assign.assign_variables(physical_activity_person_spec,
                                    persons_chunk,
                                    locals_dict=locals_dict,
                                    df_alias='persons',
                                    trace_rows=person_trace_rows)

        # since tracing is at household level, trace_results will occur in only one chunk
        # we can just write them out when we see them without need to accumulate across chunks
        if person_trace_results is not None:
            tracing.write_csv(person_trace_results,
                              file_name="physical_activity_persons",
                              index_label='persons_merged_table_index',
                              column_labels=['label', 'person'])

            if person_trace_assigned_locals:
                tracing.write_csv(person_trace_assigned_locals,
                                  file_name="physical_activity_persons_locals")

        # concat in the coc columns and summarize the chunk by coc
        person_activity = pd.concat(
            [persons_chunk[coc_column_names], person_activity], axis=1)
        coc_summary = person_activity.groupby(coc_column_names).sum()

        result_list.append(coc_summary)

        chunk_trace_label = 'trace_label chunk_%s' % i
        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)
        chunk.log_df(chunk_trace_label, 'trips_chunk', trips_chunk)
        chunk.log_df(chunk_trace_label, 'persons_chunk', persons_chunk)
        chunk.log_close(chunk_trace_label)

    if len(result_list) > 1:

        # (if there was only one chunk, then concat is redundant)
        coc_summary = pd.concat(result_list)

        # squash the accumulated chunk summaries by reapplying group and sum
        coc_summary.reset_index(inplace=True)
        coc_summary = coc_summary.groupby(coc_column_names).sum()

    result_prefix = 'PA_'
    add_result_columns("coc_results", coc_summary, result_prefix)
    add_summary_results(coc_summary,
                        prefix=result_prefix,
                        spec=physical_activity_person_spec)
コード例 #15
0
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings(
        'trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info(
                'trip_destination has already been run. Rerunning failed trips'
            )
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
                trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' %
                        trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." %
                        trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" %
                       (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
            trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
コード例 #16
0
ファイル: trip_purpose.py プロジェクト: arunakkin/activitysim
def choose_intermediate_trip_purpose(trips, probs_spec, estimator,
                                     probs_join_cols, use_depart_time,
                                     trace_hh_id, trace_label):
    """
    chose purpose for intermediate trips based on probs_spec
    which assigns relative weights (summing to 1) to the possible purpose choices

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    non_purpose_cols = probs_join_cols.copy()
    if use_depart_time:
        non_purpose_cols += ['depart_range_start', 'depart_range_end']
    purpose_cols = [c for c in probs_spec.columns if c not in non_purpose_cols]

    num_trips = len(trips.index)
    have_trace_targets = trace_hh_id and tracing.has_trace_targets(trips)

    # probs should sum to 1 across rows
    sum_probs = probs_spec[purpose_cols].sum(axis=1)
    probs_spec.loc[:,
                   purpose_cols] = probs_spec.loc[:,
                                                  purpose_cols].div(sum_probs,
                                                                    axis=0)

    # left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
    choosers = pd.merge(trips.reset_index(),
                        probs_spec,
                        on=probs_join_cols,
                        how='left').set_index('trip_id')
    chunk.log_df(trace_label, 'choosers', choosers)

    if use_depart_time:

        # select the matching depart range (this should result on in exactly one chooser row per trip)
        chooser_probs = \
            (choosers.start >= choosers['depart_range_start']) & (choosers.start <= choosers['depart_range_end'])

        # if we failed to match a row in probs_spec
        if chooser_probs.sum() < num_trips:

            # this can happen if the spec doesn't have probs for the trips matching a trip's probs_join_cols
            missing_trip_ids = trips.index[
                ~trips.index.isin(choosers.index[chooser_probs])].values
            unmatched_choosers = choosers[choosers.index.isin(
                missing_trip_ids)]
            unmatched_choosers = unmatched_choosers[['person_id', 'start'] +
                                                    non_purpose_cols]

            # join to persons for better diagnostics
            persons = inject.get_table('persons').to_frame()
            persons_cols = [
                'age', 'is_worker', 'is_student', 'is_gradeschool',
                'is_highschool', 'is_university'
            ]
            unmatched_choosers = pd.merge(unmatched_choosers,
                                          persons[[
                                              col for col in persons_cols
                                              if col in persons.columns
                                          ]],
                                          left_on='person_id',
                                          right_index=True,
                                          how='left')

            file_name = '%s.UNMATCHED_PROBS' % trace_label
            logger.error(
                "%s %s of %s intermediate trips could not be matched to probs based on join columns  %s"
                % (trace_label, len(unmatched_choosers), len(choosers),
                   probs_join_cols))
            logger.info("Writing %s unmatched choosers to %s" % (
                len(unmatched_choosers),
                file_name,
            ))
            tracing.write_csv(unmatched_choosers,
                              file_name=file_name,
                              transpose=False)
            raise RuntimeError(
                "Some trips could not be matched to probs based on join columns %s."
                % probs_join_cols)

        # select the matching depart range (this should result on in exactly one chooser row per trip)
        choosers = choosers[chooser_probs]

    # choosers should now match trips row for row
    assert choosers.index.identical(trips.index)

    if estimator:
        probs_cols = list(probs_spec.columns)
        print(choosers[probs_cols])
        estimator.write_table(choosers[probs_cols], 'probs', append=True)

    choices, rands = logit.make_choices(choosers[purpose_cols],
                                        trace_label=trace_label,
                                        trace_choosers=choosers)

    if have_trace_targets:
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, 'trip_purpose'])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])

    choices = choices.map(pd.Series(purpose_cols))
    return choices
コード例 #17
0
def participants_chooser(probs, choosers, spec, trace_label):
    """
    custom alternative to logit.make_choices for simulate.simple_simulate

    Choosing participants for mixed tours is trickier than adult or child tours becuase we
    need at least one adult and one child participant in a mixed tour. We call logit.make_choices
    and then check to see if the tour statisfies this requirement, and rechoose for any that
    fail until all are satisfied.

    In principal, this shold always occur eventually, but we fail after MAX_ITERATIONS,
    just in case there is some failure in program logic (haven't seen this occur.)

    Parameters
    ----------
    probs : pandas.DataFrame
        Rows for choosers and columns for the alternatives from which they
        are choosing. Values are expected to be valid probabilities across
        each row, e.g. they should sum to 1.
    choosers : pandas.dataframe
        simple_simulate choosers df
    spec : pandas.DataFrame
        simple_simulate spec df
        We only need spec so we can know the column index of the 'participate' alternative
        indicating that the participant has been chosen to participate in the tour
    trace_label : str

    Returns - same as logit.make_choices
    -------
    choices, rands
        choices, rands as returned by logit.make_choices (in same order as probs)

    """

    assert probs.index.equals(choosers.index)

    # choice is boolean (participate or not)
    model_settings = config.read_model_settings('joint_tour_participation.yaml')

    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = spec.columns.get_loc(choice_col)
    MAX_ITERATIONS = model_settings.get('max_participation_choice_iterations', 5000)

    trace_label = tracing.extend_trace_label(trace_label, 'participants_chooser')

    candidates = choosers.copy()
    choices_list = []
    rands_list = []

    num_tours_remaining = len(candidates.tour_id.unique())
    logger.info('%s %s joint tours to satisfy.', trace_label, num_tours_remaining,)

    iter = 0
    while candidates.shape[0] > 0:

        iter += 1

        if iter > MAX_ITERATIONS:
            logger.warning('%s max iterations exceeded (%s).', trace_label, MAX_ITERATIONS)
            diagnostic_cols = ['tour_id', 'household_id', 'composition', 'adult']
            unsatisfied_candidates = candidates[diagnostic_cols].join(probs)
            tracing.write_csv(unsatisfied_candidates,
                              file_name='%s.UNSATISFIED' % trace_label, transpose=False)
            print(unsatisfied_candidates.head(20))
            assert False

        choices, rands = logit.make_choices(probs, trace_label=trace_label, trace_choosers=choosers)
        participate = (choices == PARTICIPATE_CHOICE)

        # satisfaction indexed by tour_id
        tour_satisfaction = get_tour_satisfaction(candidates, participate)
        num_tours_satisfied_this_iter = tour_satisfaction.sum()

        if num_tours_satisfied_this_iter > 0:

            num_tours_remaining -= num_tours_satisfied_this_iter

            satisfied = reindex(tour_satisfaction, candidates.tour_id)

            choices_list.append(choices[satisfied])
            rands_list.append(rands[satisfied])

            # remove candidates of satisfied tours
            probs = probs[~satisfied]
            candidates = candidates[~satisfied]

        logger.info('%s iteration %s : %s joint tours satisfied %s remaining' %
                    (trace_label, iter, num_tours_satisfied_this_iter, num_tours_remaining,))

    choices = pd.concat(choices_list)
    rands = pd.concat(rands_list).reindex(choosers.index)

    # reindex choices and rands to match probs and v index
    choices = choices.reindex(choosers.index)
    rands = rands.reindex(choosers.index)
    assert choices.index.equals(choosers.index)
    assert rands.index.equals(choosers.index)

    logger.info('%s %s iterations to satisfy all joint tours.', trace_label, iter,)

    return choices, rands
コード例 #18
0
def trip_destination(trips, tours_merged, chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'

    model_settings_file_name = 'trip_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    CLEANUP = model_settings.get('CLEANUP', True)
    fail_some_trips_for_testing = model_settings.get(
        'fail_some_trips_for_testing', False)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    estimator = estimation.manager.begin_estimation('trip_destination')

    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
        fail_some_trips_for_testing=fail_some_trips_for_testing)

    # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run
    if config.setting('testing_fail_trip_destination',
                      False) and not trips_df.failed.any():
        if (trips_df.trip_num < trips_df.trip_count).sum() == 0:
            raise RuntimeError(
                f"can't honor 'testing_fail_trip_destination' setting because no intermediate trips"
            )

        fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
        trips_df.failed = (trips_df.origin == fail_o) & \
                          (trips_df.trip_num < trips_df.trip_count)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label,
                       trips_df.failed.sum())
        if inject.get_injectable('pipeline_file_prefix', None):
            file_name = f"{trace_label}_failed_trips_{inject.get_injectable('pipeline_file_prefix')}"
        else:
            file_name = f"{trace_label}_failed_trips"
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

    if estimator:
        estimator.end_estimation()
        # no trips should have failed since we overwrite choices and sample should have not failed trips
        assert not trips_df.failed.any()

    if CLEANUP:

        if trips_df.failed.any():
            flag_failed_trip_leg_mates(trips_df, 'failed')

            if save_sample_df is not None:
                save_sample_df.drop(trips_df.index[trips_df.failed],
                                    level='trip_id',
                                    inplace=True)

            trips_df = cleanup_failed_trips(trips_df)

        trips_df.drop(columns='failed', inplace=True, errors='ignore')

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        # might be none if want_sample_table but there are no intermediate trips
        # expect samples only for intermediate trip destinations

        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
コード例 #19
0
def compute_accessibility(settings, accessibility_spec, accessibility_settings,
                          skim_dict, omx_file, land_use, trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    logger.info("Running compute_accessibility")

    constants = config.get_model_constants(accessibility_settings)
    land_use_columns = accessibility_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    zone_count = len(land_use_df.index)

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(land_use_df.index), zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()

    locals_d = {
        'log':
        np.log,
        'exp':
        np.exp,
        'skim_od':
        AccessibilitySkims(skim_dict, omx_file, zone_count),
        'skim_do':
        AccessibilitySkims(skim_dict, omx_file, zone_count, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(accessibility_spec, od_df, locals_d, trace_rows=trace_od_rows)
    accessibility_df = pd.DataFrame(index=land_use.index)
    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (zone_count, zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

        inject.add_column("accessibility", column, accessibility_df[column])

    if trace_od:

        if not trace_od_rows.any():
            logger.warn("trace_od not found origin = %s, dest = %s" %
                        (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            # note that this is not the same as the orca-injected accessibility table
            # FIXME - should we name this differently and also dump the updated accessibility table?
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")
コード例 #20
0
def aggregate_demographics_processor(zone_hhs, aggregate_demographics_spec,
                                     settings, trace_od):
    """

    Parameters
    ----------
    zone_hhs : orca table
        input zone demographics

    """

    trace_label = 'aggregate_demographics'
    model_settings = config.read_model_settings('aggregate_demographics.yaml')

    zone_hhs_df = zone_hhs.to_frame()

    logger.info("Running %s with %d zones" % (
        trace_label,
        len(zone_hhs_df),
    ))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (zone_hhs_df.index == trace_orig) | (zone_hhs_df.index
                                                             == trace_dest)
    else:
        trace_od_rows = None

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(model_settings)
    locals_dict.update(config.setting('globals'))

    trace_rows = None

    # eval_variables evaluates each of the expressions in spec
    # in the context of each row in of the choosers dataframe
    results, trace_results, trace_assigned_locals = \
        assign.assign_variables(aggregate_demographics_spec,
                                zone_hhs_df,
                                locals_dict,
                                df_alias='hhs',
                                trace_rows=trace_od_rows)

    pipeline.replace_table("zone_demographics", results)

    # expression file can use silos column to designate result targets (e.g. count of households)
    add_aggregate_results(results,
                          aggregate_demographics_spec,
                          source=trace_label)

    if trace_results is not None:

        tracing.write_csv(trace_results,
                          file_name="aggregate_demographics",
                          index_label='zone',
                          column_labels=['label', 'zone'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="aggregate_demographics_locals")
コード例 #21
0
ファイル: aggregate_od.py プロジェクト: figo2002/bca4abm
def aggregate_od_processor(zone_districts, zones, data_dir, trace_od):

    trace_label = 'aggregate_od'

    logger.info("Running %s" % (trace_label, ))

    model_settings = config.read_model_settings('aggregate_od.yaml')

    spec_file_name = model_settings.get('spec_file_name', 'aggregate_od.csv')
    aggregate_od_spec = bca.read_assignment_spec(spec_file_name)

    zones = zones.to_frame()
    zone_districts = zone_districts.to_frame()
    zone_count = zone_districts.shape[0]

    assert zones.index.equals(zone_districts.index)

    # create OD dataframe in order compatible with ODSkims
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(zones.index), zone_count),
            'dest': np.tile(np.asanyarray(zones.index), zone_count),
        })

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(model_settings)
    locals_dict.update(config.setting('globals'))
    locals_dict['logger'] = logger

    logger.debug('%s mem before create_skim_locals_dict, %s' % (
        trace_label,
        memory_info(),
    ))

    # - add ODSkims to locals (note: we use local_skims list later to close omx files)
    cache_skims = model_settings.get('cache_skims', False)
    local_skims = create_skim_locals_dict(model_settings, data_dir, zones,
                                          cache_skims)
    locals_dict.update(local_skims)

    # - create_zone_matrices dicts
    locals_dict.update(create_zone_matrices(model_settings, zones))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    logger.debug("%s assigning variables" % (trace_label, ))
    results, trace_results, trace_assigned_locals = \
        assign.assign_variables(aggregate_od_spec,
                                od_df,
                                locals_dict=locals_dict,
                                df_alias='od',
                                trace_rows=trace_od_rows)

    logger.debug('%s mem after assign_variables, %s' % (
        trace_label,
        memory_info(),
    ))

    for local_name, od_skims in local_skims.items():
        logger.debug("closing %s" % local_name)
        od_skims.log_skim_usage()
        od_skims.close()

    # summarize aggregate_od_benefits by orig and dest districts
    logger.debug("%s district summary" % (trace_label, ))
    results['orig'] = np.repeat(np.asanyarray(zone_districts.district),
                                zone_count)
    results['dest'] = np.tile(np.asanyarray(zone_districts.district),
                              zone_count)
    district_summary = results.groupby(['orig', 'dest']).sum()
    pipeline.replace_table('aggregate_od_district_summary', district_summary)

    # attribute aggregate_results benefits to origin zone
    logger.debug("%s zone summary" % (trace_label, ))
    results['orig'] = od_df['orig']
    del results['dest']
    zone_summary = results.groupby(['orig']).sum()
    pipeline.replace_table('aggregate_od_zone_summary', zone_summary)

    add_aggregate_results(zone_summary, aggregate_od_spec, source=trace_label)

    if trace_results is not None:
        tracing.write_csv(trace_results,
                          file_name=trace_label,
                          index_label='index',
                          column_labels=['label', 'od'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="%s_locals" % trace_label,
                              index_label='variable',
                              columns='value')
コード例 #22
0
ファイル: los.py プロジェクト: jiaodaxiaozi/activitysim
    def load_data(self):
        """
        Load tables and skims from files specified in network_los settigns
        """

        # load maz tables
        if self.zone_system in [TWO_ZONE, THREE_ZONE]:

            # maz
            file_name = self.setting('maz')
            self.maz_taz_df = pd.read_csv(config.data_file_path(file_name, mandatory=True))
            self.maz_taz_df = self.maz_taz_df[['MAZ', 'TAZ']].sort_values(by='MAZ')  # only fields we need

            self.maz_ceiling = self.maz_taz_df.MAZ.max() + 1

            # maz_to_maz_df
            maz_to_maz_tables = self.setting('maz_to_maz.tables')
            maz_to_maz_tables = [maz_to_maz_tables] if isinstance(maz_to_maz_tables, str) else maz_to_maz_tables
            for file_name in maz_to_maz_tables:

                df = pd.read_csv(config.data_file_path(file_name, mandatory=True))

                df['i'] = df.OMAZ * self.maz_ceiling + df.DMAZ
                df.set_index('i', drop=True, inplace=True, verify_integrity=True)
                logger.debug(f"loading maz_to_maz table {file_name} with {len(df)} rows")

                # FIXME - don't really need these columns, but if we do want them,
                #  we would need to merge them in since files may have different numbers of rows
                df.drop(columns=['OMAZ', 'DMAZ'], inplace=True)

                # besides, we only want data columns so we can coerce to same type as skims
                df = df.astype(np.dtype(self.skim_dtype_name))

                if self.maz_to_maz_df is None:
                    self.maz_to_maz_df = df
                else:
                    self.maz_to_maz_df = pd.concat([self.maz_to_maz_df, df], axis=1)

        # load tap tables
        if self.zone_system == THREE_ZONE:

            # tap_df should already have been loaded by load_skim_info because,
            # during multiprocessing, it is required by TapTapUidCalculator to size TVPBCache
            # self.tap_df = pd.read_csv(config.data_file_path(self.setting('tap'), mandatory=True))
            assert self.tap_df is not None

            # maz_to_tap_dfs - different sized sparse arrays with different columns, so we keep them seperate
            for mode, maz_to_tap_settings in self.setting('maz_to_tap').items():

                assert 'table' in maz_to_tap_settings, \
                    f"Expected setting maz_to_tap.{mode}.table not found in in {LOS_SETTINGS_FILE_NAME}"

                file_name = maz_to_tap_settings['table']
                df = pd.read_csv(config.data_file_path(file_name, mandatory=True))

                # trim tap set
                # if provided, use tap_line_distance_col together with tap_lines table to trim the near tap set
                # to only include the nearest tap to origin when more than one tap serves the same line
                distance_col = maz_to_tap_settings.get('tap_line_distance_col')
                if distance_col:

                    if self.tap_lines_df is None:
                        # load tap_lines on demand (required if they specify tap_line_distance_col)
                        tap_lines_file_name = self.setting('tap_lines', )
                        self.tap_lines_df = pd.read_csv(config.data_file_path(tap_lines_file_name, mandatory=True))

                        # csv file has one row per TAP with space-delimited list of lines served by that TAP
                        #  TAP                                      LINES
                        # 6020  GG_024b_SB GG_068_RT GG_228_WB GG_023X_RT
                        # stack to create dataframe with one column 'line' indexed by TAP with one row per line served
                        #  TAP        line
                        # 6020  GG_024b_SB
                        # 6020   GG_068_RT
                        # 6020   GG_228_WB
                        self.tap_lines_df = \
                            self.tap_lines_df.set_index('TAP').LINES.str.split(expand=True)\
                                .stack().droplevel(1).to_frame('line')

                    old_len = len(df)

                    # NOTE - merge will remove unused taps (not appearing in tap_lines)
                    df = pd.merge(df, self.tap_lines_df, left_on='TAP', right_index=True)

                    # find nearest TAP to MAz that serves line
                    df = df.sort_values(by=distance_col).drop_duplicates(subset=['MAZ', 'line'])

                    # we don't need to remember which lines are served by which TAPs
                    df = df.drop(columns='line').drop_duplicates(subset=['MAZ', 'TAP']).sort_values(['MAZ', 'TAP'])

                    logger.debug(f"trimmed maz_to_tap table {file_name} from {old_len} to {len(df)} rows")
                    logger.debug(f"maz_to_tap table {file_name} max {distance_col} {df[distance_col].max()}")

                    max_dist = maz_to_tap_settings.get('max_dist', None)
                    if max_dist:
                        old_len = len(df)
                        df = df[df[distance_col] <= max_dist]
                        logger.debug(f"trimmed maz_to_tap table {file_name} from {old_len} to {len(df)} rows "
                                     f"based on max_dist {max_dist}")

                    if TRACE_TRIMMED_MAZ_TO_TAP_TABLES:
                        tracing.write_csv(df, file_name=f"trimmed_{maz_to_tap_settings['table']}", transpose=False)

                df.set_index(['MAZ', 'TAP'], drop=True, inplace=True, verify_integrity=True)
                logger.debug(f"loaded maz_to_tap table {file_name} with {len(df)} rows")

                assert mode not in self.maz_to_tap_dfs
                self.maz_to_tap_dfs[mode] = df

        mem.trace_memory_info('#MEM network_los.load_data before create_skim_dicts')

        # create taz skim dict
        assert 'taz' not in self.skim_dicts
        self.skim_dicts['taz'] = self.create_skim_dict('taz')
        # make sure skim has all tap_ids
        # FIXME - weird that there is no list of tazs?

        # create MazSkimDict facade
        if self.zone_system in [TWO_ZONE, THREE_ZONE]:
            # create MazSkimDict facade skim_dict
            # (must have already loaded dependencies: taz skim_dict, maz_to_maz_df, and maz_taz_df)
            assert 'maz' not in self.skim_dicts
            self.skim_dicts['maz'] = self.create_skim_dict('maz')
            # make sure skim has all maz_ids
            assert set(self.maz_taz_df['MAZ'].values).issubset(set(self.skim_dicts['maz'].zone_ids))

        # create tap skim dict
        if self.zone_system == THREE_ZONE:
            assert 'tap' not in self.skim_dicts
            self.skim_dicts['tap'] = self.create_skim_dict('tap')
            # make sure skim has all tap_ids
            assert set(self.tap_df['TAP'].values).issubset(set(self.skim_dicts['tap'].zone_ids))

        mem.trace_memory_info("network_los.load_data after create_skim_dicts")
コード例 #23
0
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)
    fail_some_trips_for_testing = model_settings.get('fail_some_trips_for_testing', False)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
        fail_some_trips_for_testing=fail_some_trips_for_testing)

    # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run
    if config.setting('testing_fail_trip_destination', False) and not trips_df.failed.any():
        fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
        trips_df.failed = (trips_df.origin == fail_o) & \
                          (trips_df.trip_num < trips_df.trip_count)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:

        if trips_df.failed.any():
            flag_failed_trip_leg_mates(trips_df, 'failed')

            if save_sample_df is not None:
                save_sample_df.drop(trips_df.index[trips_df.failed], level='trip_id', inplace=True)

            trips_df = cleanup_failed_trips(trips_df)

        trips_df.drop(columns='failed', inplace=True, errors='ignore')

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        # might be none if want_sample_table but there are no intermediate trips
        # expect samples only for intermediate trip destinations

        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" % sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
コード例 #24
0
def compute_columns(df, model_settings, configs_dir, trace_label=None):
    """
    Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals

    Parameters
    ----------
    df : pandas DataFrame
        or if None, expect name of pipeline table to be specified by DF in model_settings
    model_settings : dict or str
        dict with keys:
            DF - df_alias and (additionally, if df is None) name of pipeline table to load as df
            SPEC - name of expressions file (csv suffix optional) if different from model_settings
            TABLES - list of pipeline tables to load and make available as (read only) locals
        str:
            name of yaml file in confirs_dir to load dict from
    configs_dir
    trace_label

    Returns
    -------
    results: pandas.DataFrame
        one column for each expression (except temps with ALL_CAP target names)
        same index as df
    """

    if isinstance(model_settings, str):
        model_settings_name = model_settings
        model_settings = config.read_model_settings(configs_dir, '%s.yaml' % model_settings)
        assert model_settings, "Found no model settings for %s" % model_settings_name
    else:
        model_settings_name = 'dict'

    assert 'DF' in model_settings, \
        "Expected to find 'DF' in %s" % model_settings_name

    df_name = model_settings.get('DF')
    helper_table_names = model_settings.get('TABLES', [])
    expressions_spec_name = model_settings.get('SPEC', model_settings_name)

    assert expressions_spec_name is not None, \
        "Expected to find 'SPEC' in %s" % model_settings_name

    if trace_label is None:
        trace_label = expressions_spec_name

    if not expressions_spec_name.endswith(".csv"):
        expressions_spec_name = '%s.csv' % expressions_spec_name
    expressions_spec = assign.read_assignment_spec(os.path.join(configs_dir, expressions_spec_name))

    tables = {t: inject.get_table(t).to_frame() for t in helper_table_names}

    # if df was passed in, df might be a slice, or any other table, but DF is it's local alias
    assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name
    tables[df_name] = df

    locals_dict = local_utilities()
    locals_dict.update(tables)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(expressions_spec,
                                  df,
                                  locals_dict,
                                  trace_rows=tracing.trace_targets(df))

    if trace_results is not None:
        tracing.trace_df(trace_results,
                         label=trace_label,
                         slicer='NONE',
                         warn_if_empty=True)

    if trace_assigned_locals:
        tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label)

    return results
コード例 #25
0
def balance_trips(zones, trace_od):
    """Improve the match between destination zone trip totals
    (given by the DEST_TARGETS in the balance_trips config file)
    and the trip counts calculated during the destination choice step.

    The config file should contain the following parameters:

    dest_zone_trip_targets:
      total: <aggregate destination zone trip counts>

      OR

      <segment_1>: totals for segment 1 (optional)
      <segment_2>: totals for segment 2 (optional)
      <segment_3>: totals for segment 3 (optional)

    (These are optional)
    max_iterations: maximum number of iteration to pass to the balancer
    balance_closure: float precision to stop balancing totals
    input_table: path to CSV to use instead of trips table.

    The config file can also have an orig_zone_trip_targets to manually
    specify origin zone totals instead of using the logsums calculated by
    the destination choice step.

    Parameters
    ----------
    zones : DataFrameWrapper
        zone attributes
    trace_od : list or dict


    Returns
    -------
    Nothing. Balances trips table and writes trace tables
    """

    logger.info('running trip balancing step ...')

    model_settings = config.read_model_settings(YAML_FILENAME)

    trips_df = get_trips_df(model_settings)
    trace_rows = trace.trace_filter(trips_df, trace_od)
    tracing.write_csv(trips_df[trace_rows],
                      file_name='trips_unbalanced',
                      transpose=False)

    trips_df = trips_df.melt(
                id_vars=['orig', 'dest'],
                var_name='segment',
                value_name='trips')

    dest_targets = model_settings.get(DEST_TARGETS)
    orig_targets = model_settings.get(ORIG_TARGETS)
    max_iterations = model_settings.get('max_iterations', 50)
    closure = model_settings.get('balance_closure', 0.001)

    aggregates, dimensions = calculate_aggregates(trips_df,
                                                  zones.to_frame(),
                                                  dest_targets,
                                                  orig_targets)

    balancer = Balancer(trips_df.reset_index(),
                        aggregates,
                        dimensions,
                        weight_col='trips',
                        max_iteration=max_iterations,
                        closure=closure)
    balanced_df = balancer.balance()

    balanced_trips = balanced_df.set_index(['orig', 'dest', 'segment'])['trips'].unstack()
    tracing.write_csv(balanced_trips.reset_index()[trace_rows],
                      file_name='trips_balanced',
                      transpose=False)
    pipeline.replace_table('trips', balanced_trips)

    logger.info('finished balancing trips.')