Ejemplo n.º 1
0
def trip_schedule_calc_row_size(choosers, trace_label):
    """
    rows_per_chunk calculator for trip_scheduler
    """

    sizer = chunk.RowSizeEstimator(trace_label)

    chooser_row_size = len(choosers.columns)
    spec_columns = 3

    sizer.add_elements(chooser_row_size + spec_columns, 'choosers')

    row_size = sizer.get_hwm()
    return row_size
Ejemplo n.º 2
0
def trip_purpose_calc_row_size(choosers, spec, trace_label):
    """
    rows_per_chunk calculator for trip_purpose
    """

    sizer = chunk.RowSizeEstimator(trace_label)

    chooser_row_size = len(choosers.columns)
    spec_columns = spec.shape[1] - len(PROBS_JOIN_COLUMNS)

    sizer.add_elements(chooser_row_size + spec_columns, 'choosers')

    row_size = sizer.get_hwm()
    return row_size
Ejemplo n.º 3
0
def trip_scheduling_calc_row_size(trips, spec, trace_label):

    sizer = chunk.RowSizeEstimator(trace_label)

    # NOTE we chunk chunk_id
    # scale row_size by average number of chooser rows per chunk_id
    num_choosers = trips['chunk_id'].max() + 1
    rows_per_chunk_id = len(trips) / num_choosers

    # only non-initial trips require scheduling, segment handing first such trip in tour will use most space
    outbound_chooser = (trips.trip_num == 2) & trips.outbound & (
        trips.primary_purpose != 'atwork')
    inbound_chooser = (trips.trip_num
                       == trips.trip_count - 1) & ~trips.outbound & (
                           trips.primary_purpose != 'atwork')

    # furthermore, inbound and outbound are scheduled independently
    if outbound_chooser.sum() > inbound_chooser.sum():
        is_chooser = outbound_chooser
        logger.debug(
            f"{trace_label} {is_chooser.sum()} outbound_choosers of {len(trips)} require scheduling"
        )
    else:
        is_chooser = inbound_chooser
        logger.debug(
            f"{trace_label} {is_chooser.sum()} inbound_choosers of {len(trips)} require scheduling"
        )

    chooser_fraction = is_chooser.sum() / len(trips)
    logger.debug(f"{trace_label} chooser_fraction {chooser_fraction *100}%")

    chooser_row_size = len(trips.columns) + len(
        spec.columns) - len(PROBS_JOIN_COLUMNS)
    sizer.add_elements(chooser_fraction * chooser_row_size, 'choosers')

    # might be clipped to fewer but this is worst case
    chooser_probs_row_size = len(spec.columns) - len(PROBS_JOIN_COLUMNS)
    sizer.add_elements(chooser_fraction * chooser_probs_row_size,
                       'chooser_probs')

    sizer.add_elements(chooser_fraction, 'choices')
    sizer.add_elements(chooser_fraction, 'rands')
    sizer.add_elements(chooser_fraction, 'failed')

    row_size = sizer.get_hwm()
    row_size = row_size * rows_per_chunk_id

    return row_size
Ejemplo n.º 4
0
def accessibility_calc_row_size(accessibility_df, land_use_df, assignment_spec,
                                network_los, trace_label):
    """
    rows_per_chunk calculator for accessibility
    """

    sizer = chunk.RowSizeEstimator(trace_label)

    # if there are skims, and zone_system is THREE_ZONE, and there are any
    # then we want to estimate the per-row overhead tvpb skims
    # (do this first to facilitate tracing of rowsize estimation below)
    if network_los.zone_system == los.THREE_ZONE:
        # DISABLE_TVPB_OVERHEAD
        logger.debug("disable calc_row_size for THREE_ZONE with tap skims")
        return 0

    land_use_rows = len(land_use_df.index)
    land_use_columns = len(land_use_df.columns)
    od_columns = 2

    # assignment spec has one row per value to assign
    # count number of unique persistent assign_variables targets simultaneously resident during spec eval
    # (since dict overwrites recurring targets, only count unique targets)
    def is_persistent(target):
        return not (assign.is_throwaway(target)
                    or assign.is_temp_scalar(target))

    num_spec_values = len([
        target for target in assignment_spec.target.unique()
        if is_persistent(target)
    ])

    sizer.add_elements(land_use_rows * od_columns, 'od_df')

    # each od_df joins to all land_use zones
    sizer.add_elements(land_use_rows * land_use_columns, 'land_use_choosers')

    # and then we assign_variables to joined land_use from assignment_spec
    sizer.add_elements(land_use_rows * num_spec_values, 'spec_values')

    row_size = sizer.get_hwm()
    return row_size
Ejemplo n.º 5
0
def initialize_tvpb_calc_row_size(choosers, network_los, trace_label):
    """
    rows_per_chunk calculator for trip_purpose
    """

    sizer = chunk.RowSizeEstimator(trace_label)

    model_settings = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings')
    attributes_as_columns = \
        network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', [])

    #  expression_values for each spec row
    sizer.add_elements(len(choosers.columns), 'choosers')

    #  expression_values for each spec row
    sizer.add_elements(len(attributes_as_columns), 'attributes_as_columns')

    preprocessor_settings = model_settings.get('PREPROCESSOR')
    if preprocessor_settings:

        preprocessor_spec_name = preprocessor_settings.get('SPEC', None)

        if not preprocessor_spec_name.endswith(".csv"):
            preprocessor_spec_name = f'{preprocessor_spec_name}.csv'
        expressions_spec = assign.read_assignment_spec(
            config.config_file_path(preprocessor_spec_name))

        sizer.add_elements(expressions_spec.shape[0], 'preprocessor')

    #  expression_values for each spec row
    spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    sizer.add_elements(spec.shape[0], 'expression_values')

    #  expression_values for each spec row
    sizer.add_elements(spec.shape[1], 'utilities')

    row_size = sizer.get_hwm()

    return row_size
Ejemplo n.º 6
0
def tour_scheduling_calc_row_size(tours, persons_merged, alternatives, skims,
                                  spec, model_settings, trace_label):

    # this will not be consistent across mandatory tours (highest), non_mandatory tours, and atwork subtours (lowest)
    TIMETABLE_AVAILABILITY_REDUCTION_FACTOR = 1
    # this appears to be more stable
    LOGSUM_DUPLICATE_REDUCTION_FACTOR = 0.5

    sizer = chunk.RowSizeEstimator(trace_label)

    # chooser is tours merged with persons_merged
    chooser_row_size = len(tours.columns) + len(persons_merged.columns)

    # e.g. start, end, duration, <chooser_column>
    alt_row_size = alternatives.shape[1] + 1

    # non-available alternatives will be sliced out so this is a over-estimate
    # for atwork subtours this may be a gross over-estimate, but that is presumably ok since we are adaptive
    sample_size = len(alternatives) * TIMETABLE_AVAILABILITY_REDUCTION_FACTOR

    sizer.add_elements(chooser_row_size, 'tours')  # tours_merged with persons

    # alt_tdd tdd_interaction_dataset is cross join of choosers with alternatives
    sizer.add_elements((chooser_row_size + alt_row_size) * sample_size,
                       'interaction_df')

    # eval_interaction_utilities is parsimonious and doesn't create a separate column for each partial utility
    sizer.add_elements(
        sample_size,
        'interaction_utilities')  # <- this is probably always the HWM
    sizer.drop_elements('interaction_df')

    sizer.drop_elements('interaction_utilities')

    sizer.add_elements(alt_row_size, 'utilities_df')
    sizer.add_elements(alt_row_size, 'probs')

    if 'LOGSUM_SETTINGS' in model_settings:

        logsum_settings = config.read_model_settings(
            model_settings['LOGSUM_SETTINGS'])
        logsum_spec = simulate.read_model_spec(
            file_name=logsum_settings['SPEC'])
        logsum_nest_spec = config.get_logit_model_settings(logsum_settings)

        if logsum_nest_spec is None:
            # expression_values for each spec row
            # utilities and probs for each alt
            logsum_columns = logsum_spec.shape[0] + (2 * logsum_spec.shape[1])
        else:
            # expression_values for each spec row
            # raw_utilities and base_probabilities) for each alt
            # nested_exp_utilities, nested_probabilities for each nest
            # less 1 as nested_probabilities lacks root
            nest_count = logit.count_nests(logsum_nest_spec)
            logsum_columns = logsum_spec.shape[0] + (
                2 * logsum_spec.shape[1]) + (2 * nest_count) - 1

        if USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS:
            sizer.add_elements(logsum_columns * sample_size, 'logsum_columns')
        else:
            # if USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS is false compute_logsums prunes alt_tdd
            # to only compute logsums for unique (tour_id, out_period, in_period, duration) in alt_tdd
            # which cuts the number of alts by roughly 50% (44% for 100 hh mtctm1 test dataset)
            # grep the log for USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS to check actual % savings
            duplicate_sample_reduction = 0.5
            sizer.add_elements(
                logsum_columns * sample_size *
                LOGSUM_DUPLICATE_REDUCTION_FACTOR, 'logsum_columns')

    row_size = sizer.get_hwm()

    if simulate.tvpb_skims(skims):
        # DISABLE_TVPB_OVERHEAD
        logger.debug("disable calc_row_size for THREE_ZONE with tap skims")
        return 0

    return row_size