def calc_rows_per_chunk(chunk_size, tours, persons_merged, alternatives, trace_label=None): num_choosers = len(tours.index) # if not chunking, then return num_choosers # if chunk_size == 0: # return num_choosers, 0 chooser_row_size = tours.shape[1] sample_size = alternatives.shape[0] # persons_merged columns plus 2 previous tour columns extra_chooser_columns = persons_merged.shape[1] + 2 # one column per alternative plus skim and join columns alt_row_size = alternatives.shape[1] + 2 row_size = (chooser_row_size + extra_chooser_columns + alt_row_size) * sample_size # logger.debug("%s #chunk_calc choosers %s" % (trace_label, tours.shape)) # logger.debug("%s #chunk_calc extra_chooser_columns %s" % (trace_label, extra_chooser_columns)) # logger.debug("%s #chunk_calc alternatives %s" % (trace_label, alternatives.shape)) # logger.debug("%s #chunk_calc alt_row_size %s" % (trace_label, alt_row_size)) return chunk.rows_per_chunk(chunk_size, row_size, num_choosers, trace_label)
def calc_rows_per_chunk(chunk_size, tours, persons_merged, alternatives, trace_label=None): num_choosers = len(tours.index) # if not chunking, then return num_choosers if chunk_size == 0: return num_choosers chooser_row_size = tours.shape[1] sample_size = alternatives.shape[0] # persons_merged columns plus 2 previous tour columns extra_chooser_columns = persons_merged.shape[1] + 2 # one column per alternative plus skim and join columns alt_row_size = alternatives.shape[1] + 2 row_size = (chooser_row_size + extra_chooser_columns + alt_row_size) * sample_size logger.debug("%s #chunk_calc choosers %s" % (trace_label, tours.shape)) logger.debug("%s #chunk_calc extra_chooser_columns %s" % (trace_label, extra_chooser_columns)) logger.debug("%s #chunk_calc alternatives %s" % (trace_label, alternatives.shape)) logger.debug("%s #chunk_calc alt_row_size %s" % (trace_label, alt_row_size)) return chunk.rows_per_chunk(chunk_size, row_size, num_choosers, trace_label)
def trip_scheduling_rpc(chunk_size, choosers, spec, trace_label): # NOTE we chunk chunk_id num_choosers = choosers['chunk_id'].max() + 1 # if not chunking, then return num_choosers # if chunk_size == 0: # return num_choosers, 0 # extra columns from spec extra_columns = spec.shape[1] chooser_row_size = choosers.shape[1] + extra_columns # scale row_size by average number of chooser rows per chunk_id rows_per_chunk_id = choosers.shape[0] / num_choosers row_size = (rows_per_chunk_id * chooser_row_size) # print "num_choosers", num_choosers # print "choosers.shape", choosers.shape # print "rows_per_chunk_id", rows_per_chunk_id # print "chooser_row_size", chooser_row_size # print "(rows_per_chunk_id * chooser_row_size)", (rows_per_chunk_id * chooser_row_size) # print "row_size", row_size # #bug return chunk.rows_per_chunk(chunk_size, row_size, num_choosers, trace_label)
def calc_rows_per_chunk(chunk_size, tours, persons_merged, alternatives, model_settings, trace_label=None): num_choosers = len(tours.index) # if not chunking, then return num_choosers # if chunk_size == 0: # return num_choosers, 0 chooser_row_size = tours.shape[1] sample_size = alternatives.shape[0] # persons_merged columns plus 2 previous tour columns extra_chooser_columns = persons_merged.shape[1] + 2 # one column per alternative plus skim and join columns alt_row_size = alternatives.shape[1] + 2 logsum_columns = 0 if 'LOGSUM_SETTINGS' in model_settings: logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) logsum_spec = simulate.read_model_spec( file_name=logsum_settings['SPEC']) logsum_nest_spec = config.get_logit_model_settings(logsum_settings) if logsum_nest_spec is None: # expression_values for each spec row # utilities and probs for each alt logsum_columns = logsum_spec.shape[0] + (2 * logsum_spec.shape[1]) else: # expression_values for each spec row # raw_utilities and base_probabilities) for each alt # nested_exp_utilities, nested_probabilities for each nest # less 1 as nested_probabilities lacks root nest_count = logit.count_nests(logsum_nest_spec) logsum_columns = logsum_spec.shape[0] + ( 2 * logsum_spec.shape[1]) + (2 * nest_count) - 1 row_size = (chooser_row_size + extra_chooser_columns + alt_row_size + logsum_columns) * sample_size logger.debug("%s #chunk_calc choosers %s" % (trace_label, tours.shape)) logger.debug("%s #chunk_calc extra_chooser_columns %s" % (trace_label, extra_chooser_columns)) logger.debug("%s #chunk_calc alternatives %s" % (trace_label, alternatives.shape)) logger.debug("%s #chunk_calc alt_row_size %s" % (trace_label, alt_row_size)) logger.debug("%s #chunk_calc logsum_columns %s" % (trace_label, logsum_columns)) return chunk.rows_per_chunk(chunk_size, row_size, num_choosers, trace_label)
def calc_rows_per_chunk(chunk_size, choosers, trace_label=None): # NOTE we chunk chunk_id num_choosers = choosers['chunk_id'].max() + 1 # if not chunking, then return num_choosers # if chunk_size == 0: # return num_choosers, 0 chooser_row_size = choosers.shape[1] # scale row_size by average number of chooser rows per chunk_id rows_per_chunk_id = choosers.shape[0] / float(num_choosers) row_size = int(rows_per_chunk_id * chooser_row_size) # logger.debug("%s #chunk_calc choosers %s" % (trace_label, choosers.shape)) return chunk.rows_per_chunk(chunk_size, row_size, num_choosers, trace_label)
def calc_rows_per_chunk(chunk_size, choosers, trace_label=None): # NOTE we chunk chunk_id num_choosers = choosers['chunk_id'].max() + 1 # if not chunking, then return num_choosers # if chunk_size == 0: # return num_choosers, 0 chooser_row_size = choosers.shape[1] # scale row_size by average number of chooser rows per chunk_id rows_per_chunk_id = choosers.shape[0] / float(num_choosers) row_size = int(rows_per_chunk_id * chooser_row_size) # logger.debug("%s #chunk_calc choosers %s" % (trace_label, choosers.shape)) return chunk.rows_per_chunk(chunk_size, row_size, num_choosers, trace_label)
def calc_rows_per_chunk(chunk_size, df, spec, extra_columns=0, trace_label=None): """simple rows_per_chunk calculator for chunking calls to assign_variables ActivitySim's chunk.rows_per_chunk method handles the main logic, including a missing/zero chunk size Parameters ---------- chunk_size : int df : pandas DataFrame spec : pandas DataFrame extra_columns : int, optional trace_label : str, optional Returns ------- num_rows : int effective_chunk_size : int """ num_rows = len(df.index) df_row_size = len(df.columns) # spec temp vars are transient and (we assume) discarded before extra_columns are applied # so the extra_columns headroom will be the max of the two spec_temps = spec.target.str.match('_').sum() spec_vars = spec.shape[0] - spec_temps row_size = df_row_size + spec_vars + max(spec_temps, extra_columns) # if trace_label: # logger.debug("%s #chunk_calc df %s" % (trace_label, df.shape)) # logger.debug("%s #chunk_calc spec %s" % (trace_label, spec.shape)) # logger.debug("%s #chunk_calc extra_columns %s" % (trace_label, extra_columns)) # logger.debug("%s #chunk_calc row_size %s" % (trace_label, row_size)) return chunk.rows_per_chunk(chunk_size, row_size, num_rows, trace_label)
def physical_activity_rpc(chunk_size, trips_df, persons_df, spec, trace_label=None): # NOTE we chunk chunk_id num_chunk_ids = trips_df['chunk_id'].max() + 1 # if not chunking, then return num_chunk_ids if chunk_size == 0: return num_chunk_ids # spec temp vars are transient and discarded before persons_df is merged spec_temps = spec.target.str.match('_').sum() spec_vars = spec.shape[0] - spec_temps trip_row_size = trips_df.shape[1] + spec_vars # scale row_size by average number of chooser rows per chunk_id trip_rows_per_chunk_id = trips_df.shape[0] / float(num_chunk_ids) persons_row_size = persons_df.shape[1] persons_rows_per_chunk_id = persons_df.shape[0] / float(num_chunk_ids) row_size = (trip_rows_per_chunk_id * trip_row_size) + \ (persons_rows_per_chunk_id * persons_row_size) # print "num_chunk_ids", num_chunk_ids # print "spec_vars", spec_vars # print "spec_temps", spec_temps # print "trips_df.shape", trips_df.shape # print "trip_rows_per_chunk_id", trip_rows_per_chunk_id # print "persons_rows_per_chunk_id", persons_rows_per_chunk_id # print "trip_row_size", trip_row_size # print "persons_row_size", persons_row_size # print "row_size", row_size return chunk.rows_per_chunk(chunk_size, row_size, num_chunk_ids, trace_label)
def trip_purpose_rpc(chunk_size, choosers, spec, trace_label): """ rows_per_chunk calculator for trip_purpose """ num_choosers = len(choosers.index) # if not chunking, then return num_choosers # if chunk_size == 0: # return num_choosers, 0 chooser_row_size = len(choosers.columns) # extra columns from spec extra_columns = spec.shape[1] row_size = chooser_row_size + extra_columns # logger.debug("%s #chunk_calc choosers %s", trace_label, choosers.shape) # logger.debug("%s #chunk_calc spec %s", trace_label, spec.shape) # logger.debug("%s #chunk_calc extra_columns %s", trace_label, extra_columns) return chunk.rows_per_chunk(chunk_size, row_size, num_choosers, trace_label)