def elemental_mode(self, mand_prob_l1, veh_segment, mand_prob_append, purpose, trips): """ This function samples :param self: :param mand_prob_l1: :param veh_segment: :param mand_prob_append: :param purpose: :param trips: :param seed: :return: """ # null dataframe mand_level1 = pd.DataFrame() sampled_df = pd.DataFrame() mand_l1 = pd.DataFrame() collect_df = {} # generate the appropriate df for sampling by veh_type # and then attach the production, destination, and market segment mand_l1 = self.vehtype_prob(mand_prob_l1, veh_segment) # get the columns of prob that will be sampled mand_level1 = common.concat_df(mand_prob_append, mand_l1, 1) # concat function # Now prepare the level1 file for sampling and only keep relevant columns mand_level2, df_join = prob_df_longtermchoice(trips, purpose, mand_level1, veh_segment) mand_level2 = mand_level2.iloc[:, 0:52] # sample using Cheval sampled_df = pd.DataFrame(sample_from_weights(mand_level2, randomizer=seed, astype='category', n_threads=1, n_draws=1)).reset_index() sampled_df.columns = ['Production Zone', 'Destination Zone', 'Market Segment', 'Mode'] # create flag to help select the records in the trips dataframe. Creating the flag allows us to select # exactly the same number of rows even in the trips dataframe that match the sampled_df in length # Also sort the df to ensure that we don't end up concatenating the wrong o-ds sampled_df['flag'] = sampled_df['Production Zone'].astype(str) + sampled_df['Destination Zone'].astype(str) + \ sampled_df['Market Segment'].astype(str) sampled_df = sampled_df.sort_values(['Production Zone', 'Destination Zone', 'Market Segment']) list_un = sampled_df['flag'].unique().tolist() # select from trips dataframe recores that corresspond to the sampled df using the flag. Once again sort # to ensure proper concatenation df_join = df_join.loc[(df_join['flag'].isin(list_un))] df_join = df_join.sort_values(['taz_i', 'taz_j', 'market_seg']) # concatenate the data. Concatenate is needed as # the flag is yet not unique and a merge will result in a larger dataframe than what we started with collect_df[veh_segment] = common.concat_df(df_join, sampled_df, 1) print("Vehicle Type: %s" % (veh_segment), len(df_join), len(mand_level1), len(sampled_df), len(mand_l1), len(mand_prob_l1)) # now make one dataframe across vehicle segments mand_mode = pd.concat(collect_df.values(), ignore_index=True) mand_mode['PrimaryMode'] = mand_mode['Mode'].map(lambda x: str(x)[10:]) return mand_mode
def assign_vehtype(self, hh_file, vehtype_file, seed): """ This function takes the household file and processed vehicle type file and samples a vehicle to attach to each household. :param hh_file: households file noted in the control_parameters file :param vehtype_file: vehicle type probability file noted in the control_parameters file :return: sample_df: A dataframe with the sampled vehicle type """ # join the vehicle probabilities to the households so that we can sample from them. It is easy enough # to attach the probabilities because we know the market segment of each household. hh_vehprob = pd.merge(hh_file, vehtype_file, left_on=['taz', 'market_seg'], right_on=['taz', 'market_seg'], how='left') # now unstack and get it ready for Cheval hh_vehprob = hh_vehprob.pivot(index='hhid', columns='vtype', values='value') # Sample a vehicle type using Cheval sample_df = pd.DataFrame( sample_from_weights(hh_vehprob, randomizer=seed, astype='category', n_threads=3)) sample_df.columns = ['hh_veh_type'] hh_file = common.concat_df(hh_file, sample_df, 1) return hh_file
def assign_vehtype(self, hh_file, vehtype_file, seed): """ This function takes the household file and processed vehicle type file and samples a vehicle to attach to each household. :param hh_file: :param vehtype_file: :return: sample_df """ # join the vehicle probabilities to the households so that we can sample from them. It is easy enough # to attach the probabilities because we know the market segment of each household. hh_vehprob = pd.merge(hh_file, vehtype_file, left_on=['taz', 'market_seg'], right_on=['taz', 'market_seg'], how='left') # now unstack and get it ready for Cheval hh_vehprob = hh_vehprob.pivot(index='hhid', columns='vtype', values='value') # Sample a vehicle type using Cheval sample_df = pd.DataFrame(sample_from_weights(hh_vehprob, randomizer=seed, astype='category', n_threads=3)) sample_df.columns = ['hh_veh_type'] hh_file = concat_df(hh_file, sample_df, 1) return hh_file
def identify_peak(self, trips_hhold_df): """ :return: """ # The trips_out file contains a peak hour factor column that decides whether a trip is sampled # in the peak or off-peak period. In order to discretely select the peak records and vice-versa # an uniform random number generator is run and the values are attached to the trips_out file. # If the (1-peak_factor) value in the record is greater than that of the random value than # the record is in the off-peak and vice-versa. np.random.seed(mprobe_valid.seed) random = pd.DataFrame(np.random.uniform(size=len(trips_hhold_df))) random.columns = ['rnum'] # attach the random number generator and calculate peak_flag. A value of 1 in this flag # means that this is a peak period trip record. trips_hhold_df = common.concat_df(trips_hhold_df, random, 1) trips_hhold_df['peak_flag'] = np.where( (1 - trips_hhold_df['peak_factor']) > trips_hhold_df['rnum'], 0, 1) trips_hhold_df[['peak_flag']] = trips_hhold_df[['peak_flag']].astype( 'int8') # save some memory mprobe_valid.logger.info( "Return the trips and household dataframe combined with each record tagged as to whether" "it starts in the peak (1) or off-peak period (0)") return trips_hhold_df
def run(self, trips_hhold_df, peak_consistency): """ This function creates a peak flag for every record in the trips_hhold dataframe. This is needed to ensure that MLOGIT produces the correct probabilities by time period and O-D pair. If the peak_consistency flag is set to 1 then the function ensures consistency in choosing the peak-off peak flag for the outbound and inbound trip of the mandatory tours. This option also increases the run times by around 35 minutes as every row needs to be evaluated. :param trips_hhold_df: the trips_hhold df that needs a peak flag :param peak_consistency: if activated to 1 then consistency between the outbound and inbound trips of the mandatory tour are maintained. :return: trips_hhold df with peak flag """ # The trips_out file contains a peak hour factor column that decides whether a trip is sampled # in the peak or off-peak period. In order to discretely select the peak records and vice-versa # an uniform random number generator is run and the values are attached to the trips_out file. # If the (1-peak_factor) value in the record is greater than that of the random value than # the record is in the off-peak and vice-versa. np.random.seed(mprobe_valid.seed) random = pd.DataFrame(np.random.uniform(size=len(trips_hhold_df))) random.columns = ['rnum'] # attach the random number generator and calculate peak_flag. A value of 1 in this flag # means that this is a peak period trip record. trips_hhold_df = common.concat_df(trips_hhold_df, random, 1) trips_hhold_df['peak_flag'] = np.where(trips_hhold_df['rnum'] <= trips_hhold_df['peak_factor'], 1, 0) trips_hhold_df[['peak_flag']] = trips_hhold_df[['peak_flag']].astype('int8') mprobe_valid.logger.info("Return the trips and household dataframe combined with each record that has a home end" "in it tagged as to whether it starts in the peak (1) or off-peak period (0). The rest" "of the records are populated with a dummy value of 10 as their time period will be " "determined by the destination choice model of the GGHMV4") # GGHMV4 carries out mode choice for the HBW, HBS, and HBU trips at the PA level. This essentially means that # the return trip of that tour must also lie in the same peak period that the home based trip was in. if peak_consistency == 1: mprobe_valid.logger.info("Peak consistency flag set to %s." % peak_consistency) # set default values loop = 0 start_peak_flag = 0 # convert the trips_hhold dataframe and run the peak consistency function. Once completed make the array # back to a dataframe and set column names trips_hhold_df_array = trips_hhold_df.values trips_hhold_df_array = self.peak_consistency(trips_hhold_df_array) trips_hhold_df_array = pd.DataFrame(trips_hhold_df_array) trips_hhold_df_array.columns = trips_hhold_df.columns # reset the trips_hhold_df dataframe trips_hhold_df = trips_hhold_df_array mprobe_valid.logger.info("Peak flag populated") return trips_hhold_df
def run_dest_solver(self, group, chaos_monkey): # run destination solver. But first translate the trips_hhold dataframe to a numpy array. This results in a # drop in run times. Unlike the peak solver which saw a drop from 30 mins to 20 seconds, the destination # solver sees around a 50% run time savings to around 2 hours. # some housekeeping before running the destination solver function. First, create the flag that will help # choose the appropriate non-mandatory matrix to sample from. # Bring the in binary files and save to dictionary trips_hhold = group # control_parameters.logger.info("Prepare the trips_hhold dataframe for the destination solver function") trips_hhold['dict_flag'] = trips_hhold['purpose'].astype(str) + '_' + trips_hhold['market_seg'].apply(str) + '_' + \ trips_hhold['peak_flag'].apply(str) # Second, there are many instances where the person only makes mandatory tours, in which case we don't need to # evaluate it. Thus, only keep records where the taz_j has a 0 to run the destination solver. Create a flag to # help identify the appropriate records # control_parameters.logger.info("Getting households and person trip records that have more than just manadatory trips," # "thereby needing the destination solver.") tgr = trips_hhold.iloc[np.where(trips_hhold['taz_j'].values == 0)].\ groupby(['hhid', 'pid']).\ size().\ reset_index(name="count") tgr['solver_flag'] = 1 tgr.drop('count', axis=1, inplace = True) # transfer the flag information to the trips_hhold while holding it in a temp_df and slicing it to hold the # requisite records. trips_hhold = pd.merge(trips_hhold, tgr, how='left', on=['hhid', 'pid']) trips_hhold['solver_flag'].fillna(0, inplace=True) # create temp df to run through the destination solver temp_df = trips_hhold temp_df = temp_df.iloc[np.where(temp_df['solver_flag'].values == 1)] # control_parameters.logger.info( # "A total of %s nonmandatory trips will be assigned a origin and/or destination" % temp_df.shape[0]) # control_parameters.logger.info("Running the destination solver. Please be patient. There are too many records and machinations" # "that need to be completed before it all ends.") trips_hhold_array = temp_df.values trips_hhold_array = self.destination_solver(trips_hhold_array, chaos_monkey) trips_hhold_dest_df = pd.DataFrame(trips_hhold_array) trips_hhold_dest_df.columns = trips_hhold.columns # The records are now concatenated back to the original trips_hhold df, but as a replacement # control_parameters.logger.info("Concatenating the records back") trips_hhold = trips_hhold.iloc[np.where(trips_hhold['solver_flag'].values == 0)] trips_hhold = common.concat_df(trips_hhold, trips_hhold_dest_df, 0) return(trips_hhold)
def egress_prob(self, mand_mode, mand_eg_prob): """ :param mand_mode: :param mand_eg_prob: :return: """ egg_df = mand_mode.loc[mand_mode['EgressZone'] > 0] cols = ['Production Zone', 'Destination Zone'] egg_df[cols] = egg_df[cols].astype(int) # groupby and get the number of draws for each unique O-D pair that has an egress zone egg_df_gr = egg_df.groupby(['Production Zone', 'Destination Zone', 'PrimaryMode']).size().\ reset_index(name='counts') # get column names and melt the dataframe on the production and destination zones # and then add in columns for defining the primary and egress modes melt_df = pd.melt(mand_eg_prob, id_vars=['Production Zone', 'Destination Zone']) melt_df['PrimaryMode'] = melt_df['variable'].str[26:] melt_df['EgressMode'] = melt_df['variable'].str[21:25] melt_df.drop('variable', axis=1, inplace=True) # get rid of any non-uniqueness and get it read for joining melt_df = melt_df.pivot_table(index=['Production Zone', 'Destination Zone', 'PrimaryMode'], columns='EgressMode', values='value').reset_index() # The melted df is now joined back to the group dataframe # so that the grouped df can be expanded by the counts and contains the egress probabilities as well. egg_df_gr1 = pd.merge(egg_df_gr, melt_df, on=['Production Zone', 'Destination Zone', 'PrimaryMode'], how='left') egg_df_gr1 = egg_df_gr1.loc[np.repeat(egg_df_gr1.index.values, egg_df_gr1['counts'])] # Now make the df back to a wide format and ready for sampling. ALso, Bill does not explicitly compute bus # probabilities, which are computed by subtracting Uber and Walk from 1. egg_df_gr1.set_index(['Production Zone', 'Destination Zone', 'PrimaryMode'], inplace=True) egg_df_gr1.drop('counts', axis=1, inplace=True) egg_df_gr1['Bus'] = 1 - (egg_df_gr1['Uber'] + egg_df_gr1['Walk']) # ' sample egress mode sampled_df_eg = pd.DataFrame(sample_from_weights(egg_df_gr1, randomizer = self.seed, astype='category', n_threads=3, n_draws=1)).reset_index() egg_df_gr1 = common.concat_df(egg_df_gr1, sampled_df_eg, 1) egg_df_gr1.rename(columns={egg_df_gr1.columns[-1]: "EgressMode"}, inplace=True) # assign egress mode cols = [0, 1, 2] egg_df_gr1.drop(egg_df_gr1.columns[cols], axis=1, inplace=True) # like before we need a flag to join the information back to hbw_mode df. We also sort the dfs before concatenating egg_df_gr1['egressflag'] = egg_df_gr1['Production Zone'].astype(str) + \ egg_df_gr1['Destination Zone'].astype(str) + \ egg_df_gr1['PrimaryMode'].astype(str) egg_df_gr1 = egg_df_gr1.sort_values(['Production Zone', 'Destination Zone', 'egressflag']) # create unique list for selection list_un_eg = egg_df_gr1['egressflag'].unique().tolist() # get temp dataframe to do the assigning of the egress mode and this will then be later integrated with the # chunk being processed temp_df = mand_mode temp_df['egressflag'] = np.where(temp_df['EgressZone'] > 0, temp_df['Production Zone'].astype(str) + \ temp_df['Destination Zone'].astype(str) + \ temp_df['PrimaryMode'].astype(str), np.NAN) temp_df = temp_df.loc[(temp_df['egressflag'].isin(list_un_eg))].sort_values( ['Production Zone', 'Destination Zone', 'egressflag']) # concatenate the dfs temp_df = common.concat_df(temp_df, egg_df_gr1, 1) # remove the egress records from the hbw_mode chunk and replace them with with the temp dfs. One will need to get rid of duplicated # columns as well mand_mode = mand_mode.loc[mand_mode['egressflag'].isnull()] mand_mode = common.concat_df(mand_mode, temp_df, 0) return mand_mode
def egress_prob(self, mand_mode, melt_df): """ :param mand_mode: :param mand_eg_prob: :return: """ egg_df = mand_mode[mand_mode['EgressZone'] > 0].copy() # egg_df = mand_mode.loc[mand_mode['EgressZone'] > 0] cols = ['Production Zone', 'Destination Zone'] egg_df[cols] = egg_df[cols].astype(int) # groupby and get the number of draws for each unique O-D pair that has an egress zone egg_df_gr = egg_df.groupby(['Production Zone', 'Destination Zone', 'PrimaryMode']).size().\ reset_index(name='counts') # The melted df is now joined back to the group dataframe # so that the grouped df can be expanded by the counts and contains the egress probabilities as well. egg_df_gr1 = pd.merge( egg_df_gr, melt_df, on=['Production Zone', 'Destination Zone', 'PrimaryMode'], how='left') egg_df_gr1 = egg_df_gr1.loc[np.repeat(egg_df_gr1.index.values, egg_df_gr1['counts'])] # Now make the df back to a wide format and ready for sampling. ALso, Bill does not explicitly compute bus # probabilities, which are computed by subtracting Uber and Walk from 1. egg_df_gr1.set_index( ['Production Zone', 'Destination Zone', 'PrimaryMode'], inplace=True) egg_df_gr1.drop('counts', axis=1, inplace=True) egg_df_gr1['Bus'] = 1 - (egg_df_gr1['Uber'] + egg_df_gr1['Walk']) # ' sample egress mode sampled_df_eg = pd.DataFrame( sample_from_weights(egg_df_gr1, randomizer=self.prng, astype='category', n_threads=3, n_draws=1)).reset_index() egg_df_gr1 = common.concat_df(egg_df_gr1, sampled_df_eg, 1) egg_df_gr1.rename(columns={egg_df_gr1.columns[-1]: "EgressMode"}, inplace=True) # assign egress mode cols = [0, 1, 2] egg_df_gr1.drop(egg_df_gr1.columns[cols], axis=1, inplace=True) # like before we need a flag to join the information back to hbw_mode df. We also sort the dfs before concatenating egg_df_gr1['egressflag'] = egg_df_gr1['Production Zone'].astype(str) + \ egg_df_gr1['Destination Zone'].astype(str) + \ egg_df_gr1['PrimaryMode'].astype(str) egg_df_gr1 = egg_df_gr1.sort_values( ['Production Zone', 'Destination Zone', 'egressflag']) # create unique list for selection list_un_eg = egg_df_gr1['egressflag'].unique().tolist() # get temp dataframe to do the assigning of the egress mode and this will then be later integrated with the # chunk being processed temp_df = mand_mode temp_df['egressflag'] = np.where(temp_df['EgressZone'] > 0, temp_df['Production Zone'].astype(str) + \ temp_df['Destination Zone'].astype(str) + \ temp_df['PrimaryMode'].astype(str), np.NAN) temp_df = temp_df.loc[( temp_df['egressflag'].isin(list_un_eg))].sort_values( ['Production Zone', 'Destination Zone', 'egressflag']) # concatenate the dfs temp_df = common.concat_df(temp_df, egg_df_gr1, 1) # remove the egress records from the hbw_mode chunk and replace them with with the temp dfs. One will # need to get rid of duplicated columns as well mand_mode = mand_mode.loc[mand_mode['egressflag'].isnull()] mand_mode = common.concat_df(mand_mode, temp_df, 0) return mand_mode
def elemental_mode(self, mand_prob_l1, veh_segment, mand_prob_append, purpose, trips): """ This function samples :param self: :param mand_prob_l1: :param veh_segment: :param mand_prob_append: :param purpose: :param trips: :return: """ # null dataframe mand_level1 = pd.DataFrame() sampled_df = pd.DataFrame() mand_l1 = pd.DataFrame() collect_df = {} # generate the appropriate df for sampling by veh_type # and then attach the production, destination, and market segment mand_l1 = self.vehtype_prob( mand_prob_l1, veh_segment ) # get the columns of prob by veh type that will be sampled mand_level1 = common.concat_df(mand_prob_append, mand_l1, 1) # concat function # Now prepare the file for sampling and only keep relevant columns mand_level2, df_join = self.prob_df_longtermchoice( trips, purpose, mand_level1, veh_segment) mand_level2 = mand_level2.iloc[:, 0:52] mand_level2 = mand_level2.loc[(mand_level2 != 0).any( axis=1)] # get rid of rows that are zero all the way if len(mand_level2) > 0: control_parameters.logger.info( "Start sampling of the elemental mode for vehicle segment %s. This is initiated " "provided the binary probability file has records with " "non-zero probabilities. A total of %s records are in the df" % (veh_segment, len(mand_level2))) # sample using Cheval sampled_df = pd.DataFrame( sample_from_weights(mand_level2, randomizer=self.prng, astype='category', n_threads=1, n_draws=1)).reset_index() sampled_df.columns = [ 'Production Zone', 'Destination Zone', 'Market Segment', 'Mode' ] # create flag to help select the records in the trips dataframe. Creating the flag allows us to select # exactly the same number of rows even in the trips dataframe that match the sampled_df in length # Also sort the df to ensure that we don't end up concatenating the wrong o-ds sampled_df['flag'] = sampled_df['Production Zone'].astype(str) + sampled_df['Destination Zone'].astype(str) + \ sampled_df['Market Segment'].astype(str) sampled_df = sampled_df.sort_values( ['Production Zone', 'Destination Zone', 'Market Segment']) list_un = sampled_df['flag'].unique().tolist() # select from trips dataframe that correspond to the sampled df using the flag. Once again sort # to ensure proper concatenation df_join = df_join.loc[(df_join['flag'].isin(list_un))] df_join = df_join.sort_values(['taz_i', 'taz_j', 'market_seg']) # concatenate the data. Concatenate is needed as # the flag is yet not unique and a merge will result in a larger dataframe than what we started with collect_df[veh_segment] = common.concat_df(df_join, sampled_df, 1) if len(collect_df) > 0: control_parameters.logger.info( "Concatenate the dictionary of dataframes by vehicle segment") # now make one dataframe across vehicle segments mand_mode = pd.concat(collect_df.values(), ignore_index=True) mand_mode['PrimaryMode'] = mand_mode['Mode'].map( lambda x: str(x)[10:]) mand_mode['PrimaryMode'] = mand_mode['PrimaryMode'].astype( 'category') return mand_mode # return empty df mand_mode = pd.DataFrame() control_parameters.logger.info( "Returning an empty dataframe because there were no elemental probabilities in the " "i-j pairs for the vehicle segment %s " % veh_segment) return mand_mode
def elemental_mode(self, mand_prob_l1, veh_segment, mand_prob_append, purpose, trips): """ This function samples :param self: :param mand_prob_l1: :param veh_segment: :param mand_prob_append: :param purpose: :param trips: :param seed: :return: """ # null dataframe mand_level1 = pd.DataFrame() sampled_df = pd.DataFrame() mand_l1 = pd.DataFrame() collect_df = {} # generate the appropriate df for sampling by veh_type # and then attach the production, destination, and market segment mand_l1 = self.vehtype_prob( mand_prob_l1, veh_segment) # get the columns of prob that will be sampled mand_level1 = common.concat_df(mand_prob_append, mand_l1, 1) # concat function # Now prepare the level1 file for sampling and only keep relevant columns mand_level2, df_join = prob_df_longtermchoice(trips, purpose, mand_level1, veh_segment) mand_level2 = mand_level2.iloc[:, 0:52] # sample using Cheval sampled_df = pd.DataFrame( sample_from_weights(mand_level2, randomizer=seed, astype='category', n_threads=1, n_draws=1)).reset_index() sampled_df.columns = [ 'Production Zone', 'Destination Zone', 'Market Segment', 'Mode' ] # create flag to help select the records in the trips dataframe. Creating the flag allows us to select # exactly the same number of rows even in the trips dataframe that match the sampled_df in length # Also sort the df to ensure that we don't end up concatenating the wrong o-ds sampled_df['flag'] = sampled_df['Production Zone'].astype(str) + sampled_df['Destination Zone'].astype(str) + \ sampled_df['Market Segment'].astype(str) sampled_df = sampled_df.sort_values( ['Production Zone', 'Destination Zone', 'Market Segment']) list_un = sampled_df['flag'].unique().tolist() # select from trips dataframe recores that corresspond to the sampled df using the flag. Once again sort # to ensure proper concatenation df_join = df_join.loc[(df_join['flag'].isin(list_un))] df_join = df_join.sort_values(['taz_i', 'taz_j', 'market_seg']) # concatenate the data. Concatenate is needed as # the flag is yet not unique and a merge will result in a larger dataframe than what we started with collect_df[veh_segment] = common.concat_df(df_join, sampled_df, 1) print("Vehicle Type: %s" % (veh_segment), len(df_join), len(mand_level1), len(sampled_df), len(mand_l1), len(mand_prob_l1)) # now make one dataframe across vehicle segments mand_mode = pd.concat(collect_df.values(), ignore_index=True) mand_mode['PrimaryMode'] = mand_mode['Mode'].map(lambda x: str(x)[10:]) return mand_mode
def elemental_mode(self, mand_prob_l1, veh_segment, mand_prob_append, purpose, trips): """ This function samples :param self: :param mand_prob_l1: :param veh_segment: :param mand_prob_append: :param purpose: :param trips: :return: """ # null dataframe mand_level1 = pd.DataFrame() sampled_df = pd.DataFrame() mand_l1 = pd.DataFrame() collect_df = {} # generate the appropriate df for sampling by veh_type # and then attach the production, destination, and market segment mand_l1 = self.vehtype_prob(mand_prob_l1, veh_segment) # get the columns of prob by veh type that will be sampled mand_level1 = common.concat_df(mand_prob_append, mand_l1, 1) # concat function # Now prepare the file for sampling and only keep relevant columns mand_level2, df_join = self.prob_df_longtermchoice(trips, purpose, mand_level1, veh_segment) mand_level2 = mand_level2.iloc[:, 0:52] mand_level2 = mand_level2.loc[(mand_level2 != 0).any(axis=1)] # get rid of rows that are zero all the way if len(mand_level2) >0: common.logger.info("Start sampling of the elemental mode for vehicle segment %s. This is initiated " "provided the binary probability file has records with " "non-zero probabilities." %veh_segment) # sample using Cheval sampled_df = pd.DataFrame(sample_from_weights(mand_level2, randomizer=self.seed, astype='category', n_threads=1, n_draws=1)).reset_index() sampled_df.columns = ['Production Zone', 'Destination Zone', 'Market Segment', 'Mode'] # create flag to help select the records in the trips dataframe. Creating the flag allows us to select # exactly the same number of rows even in the trips dataframe that match the sampled_df in length # Also sort the df to ensure that we don't end up concatenating the wrong o-ds sampled_df['flag'] = sampled_df['Production Zone'].astype(str) + sampled_df['Destination Zone'].astype(str) + \ sampled_df['Market Segment'].astype(str) sampled_df = sampled_df.sort_values(['Production Zone', 'Destination Zone', 'Market Segment']) list_un = sampled_df['flag'].unique().tolist() # select from trips dataframe that correspond to the sampled df using the flag. Once again sort # to ensure proper concatenation df_join = df_join.loc[(df_join['flag'].isin(list_un))] df_join = df_join.sort_values(['taz_i', 'taz_j', 'market_seg']) # concatenate the data. Concatenate is needed as # the flag is yet not unique and a merge will result in a larger dataframe than what we started with collect_df[veh_segment] = common.concat_df(df_join, sampled_df, 1) if len(collect_df) > 0: common.logger.info("Concatenate the dictionary of dataframes by vehicle segment") # now make one dataframe across vehicle segments mand_mode = pd.concat(collect_df.values(), ignore_index=True) mand_mode['PrimaryMode'] = mand_mode['Mode'].map(lambda x: str(x)[10:]) return mand_mode # return empty df mand_mode = pd.DataFrame() common.logger.info("Returning an empty dataframe because there were no elemental probabilities in the " "i-j pairs for the vehicle segment %s " % veh_segment) return mand_mode
def run(self, trips_hhold, nonmandatory_purposes): # run destination solver. But first translate the trips_hhold dataframe to a numpy array. This results in a # drop in run times. Unlike the peak solver which saw a drop from 30 mins to 20 seconds, the destination # solver sees around a 50% run time savings to around 2 hours. # some housekeeping before running the destination solver function. First, create the flag that will help # choose the appropriate non-mandatory matrix to sample from. mprobe_valid.logger.info("Prepare the trips_hhold dataframe for the destination solver function") nrows_trips_hhold = trips_hhold.shape[0] trips_hhold['dict_flag'] = trips_hhold['purpose'].astype(str) + '_' + trips_hhold['market_seg'].apply(str) + '_' + \ trips_hhold['peak_flag'].apply(str) # Second, there are many instances where the person only makes mandatory tours, in which case we don't need to # evaluate it. Thus, only keep records where the taz_j has a 0 to run the destination solver. Create a flag to # help identify the appropriate records mprobe_valid.logger.info("Getting households and person trip records that have more than just manadatory trips," "thereby needing the destination solver.") tgr = trips_hhold.iloc[np.where(trips_hhold['taz_j'].values == 0)].\ groupby(['hhid', 'pid']).\ size().\ reset_index(name="count") tgr['solver_flag'] = 1 tgr.drop('count', axis=1, inplace = True) # transfer the flag information to the trips_hhold while holding it in a temp_df and slicing it to hold the # requisite records. trips_hhold = pd.merge(trips_hhold, tgr, how='left', on=['hhid', 'pid']) trips_hhold['solver_flag'].fillna(0, inplace=True) # create temp df to run through the destination solver temp_df = trips_hhold temp_df = temp_df.iloc[np.where(temp_df['solver_flag'].values == 1)] # The destination solver is run, but first provide a numpy array. Once run, the numpy array is converted back # to a dataframe. mprobe_valid.logger.info("Running the destination solver. Please be patient. There are too many records and machinations" "that need to be completed before it all ends.") trips_hhold_array = temp_df.values trips_hhold_array = self.destination_solver(trips_hhold_array) trips_hhold_dest_df = pd.DataFrame(trips_hhold_array) trips_hhold_dest_df.columns = trips_hhold.columns # The records are now concatenated back to the original trips_hhold df, but as a replacement mprobe_valid.logger.info("Concatenating the records back") trips_hhold = trips_hhold.iloc[np.where(trips_hhold['solver_flag'].values == 0)] trips_hhold = common.concat_df(trips_hhold, trips_hhold_dest_df, 0) trips_hhold.sort_values(['hhid', 'pid'], inplace= True, ascending = True) # check if the length of the final dataframe (after concatenating) is the same length as the original if not len(trips_hhold) == nrows_trips_hhold: mprobe_valid.logger.info("The number of rows after running the destination solver is different from that of" "the original dataframe. Something is wrong.") exit(0) else: mprobe_valid.logger.info("Destination solver finished successfully") # now batch out the necessary matrices mprobe_valid.logger.info("Start saving the matrices in the format desired by Mlogit") for purpose in nonmandatory_purposes: nonmand_only = trips_hhold.iloc[np.where(trips_hhold['purpose'].values == purpose)] # now loop over the peak periods for peak in range(0, 2): timeperiod_df = nonmand_only.loc[nonmand_only['peak_flag'] == peak] timeperiod_df = timeperiod_df.groupby(['taz_i', 'taz_j', 'purpose', 'market_seg']).size().reset_index( name='freq') # now loop over the segments for segment in timeperiod_df['market_seg'].unique(): # create filename and then groupby # only keep relevant cols and set a flag # Merge the ggh zones and the trip list and convert to wide format fname = purpose + "_" + str(segment) df_hbw = timeperiod_df.loc[timeperiod_df['market_seg'] == segment] df_hbw = df_hbw[['taz_i', 'taz_j']] df_hbw['probflag'] = 1 # Make square dataframe for Fortran df_hbw1 = pd.merge(self.ggh2, df_hbw, how="left", left_on=['ggh_zone_x', 'ggh_zone_y'], right_on=['taz_i', 'taz_j']) df_hbw2 = df_hbw1.pivot_table(index='ggh_zone_x', columns='ggh_zone_y', values='probflag', fill_value=0) to_fortran(df_hbw2, os.path.join(mprobe_valid.dirListing_abm, fname + ' peak_flag ' + str(peak) + '.bin'), n_columns=4000) mprobe_valid.logger.info("All matrices saved.") return trips_hhold
def run(self, trips_hhold, nonmandatory_purposes): # run destination solver. But first translate the trips_hhold dataframe to a numpy array. This results in a # drop in run times. Unlike the peak solver which saw a drop from 30 mins to 20 seconds, the destination # solver sees around a 50% run time savings to around 2 hours. # some housekeeping before running the destination solver function. First, create the flag that will help # choose the appropriate non-mandatory matrix to sample from. mprobe_valid.logger.info( "Prepare the trips_hhold dataframe for the destination solver function" ) nrows_trips_hhold = trips_hhold.shape[0] trips_hhold['dict_flag'] = trips_hhold['purpose'].astype(str) + '_' + trips_hhold['market_seg'].apply(str) + '_' + \ trips_hhold['peak_flag'].apply(str) # Second, there are many instances where the person only makes mandatory tours, in which case we don't need to # evaluate it. Thus, only keep records where the taz_j has a 0 to run the destination solver. Create a flag to # help identify the appropriate records mprobe_valid.logger.info( "Getting households and person trip records that have more than just manadatory trips," "thereby needing the destination solver.") tgr = trips_hhold.iloc[np.where(trips_hhold['taz_j'].values == 0)].\ groupby(['hhid', 'pid']).\ size().\ reset_index(name="count") tgr['solver_flag'] = 1 tgr.drop('count', axis=1, inplace=True) # transfer the flag information to the trips_hhold while holding it in a temp_df and slicing it to hold the # requisite records. trips_hhold = pd.merge(trips_hhold, tgr, how='left', on=['hhid', 'pid']) trips_hhold['solver_flag'].fillna(0, inplace=True) # create temp df to run through the destination solver temp_df = trips_hhold temp_df = temp_df.iloc[np.where(temp_df['solver_flag'].values == 1)] # The destination solver is run, but first provide a numpy array. Once run, the numpy array is converted back # to a dataframe. mprobe_valid.logger.info( "Running the destination solver. Please be patient. There are too many records and machinations" "that need to be completed before it all ends.") trips_hhold_array = temp_df.values trips_hhold_array = self.destination_solver(trips_hhold_array) trips_hhold_dest_df = pd.DataFrame(trips_hhold_array) trips_hhold_dest_df.columns = trips_hhold.columns # The records are now concatenated back to the original trips_hhold df, but as a replacement mprobe_valid.logger.info("Concatenating the records back") trips_hhold = trips_hhold.iloc[np.where( trips_hhold['solver_flag'].values == 0)] trips_hhold = common.concat_df(trips_hhold, trips_hhold_dest_df, 0) trips_hhold.sort_values(['hhid', 'pid'], inplace=True, ascending=True) # check if the length of the final dataframe (after concatenating) is the same length as the original if not len(trips_hhold) == nrows_trips_hhold: mprobe_valid.logger.info( "The number of rows after running the destination solver is different from that of" "the original dataframe. Something is wrong.") exit(0) else: mprobe_valid.logger.info( "Destination solver finished successfully") # now batch out the necessary matrices mprobe_valid.logger.info( "Start saving the matrices in the format desired by Mlogit") for purpose in nonmandatory_purposes: nonmand_only = trips_hhold.iloc[np.where( trips_hhold['purpose'].values == purpose)] # now loop over the peak periods for peak in range(0, 2): timeperiod_df = nonmand_only.loc[nonmand_only['peak_flag'] == peak] timeperiod_df = timeperiod_df.groupby( ['taz_i', 'taz_j', 'purpose', 'market_seg']).size().reset_index(name='freq') # now loop over the segments for segment in timeperiod_df['market_seg'].unique(): # create filename and then groupby # only keep relevant cols and set a flag # Merge the ggh zones and the trip list and convert to wide format fname = purpose + "_" + str(segment) df_hbw = timeperiod_df.loc[timeperiod_df['market_seg'] == segment] df_hbw = df_hbw[['taz_i', 'taz_j']] df_hbw['probflag'] = 1 # Make square dataframe for Fortran df_hbw1 = pd.merge(self.ggh2, df_hbw, how="left", left_on=['ggh_zone_x', 'ggh_zone_y'], right_on=['taz_i', 'taz_j']) df_hbw2 = df_hbw1.pivot_table(index='ggh_zone_x', columns='ggh_zone_y', values='probflag', fill_value=0) to_fortran(df_hbw2, os.path.join( mprobe_valid.dirListing_abm, fname + ' peak_flag ' + str(peak) + '.bin'), n_columns=4000) mprobe_valid.logger.info("All matrices saved.") return trips_hhold
def run(self, trips_hhold, nonmandatory_purposes, chaos_monkey): # run destination solver. But first translate the trips_hhold dataframe to a numpy array. This results in a # drop in run times. Unlike the peak solver which saw a drop from 30 mins to 20 seconds, the destination # solver sees around a 50% run time savings to around 2 hours. # some housekeeping before running the destination solver function. First, create the flag that will help # choose the appropriate non-mandatory matrix to sample from. control_parameters.logger.info( "Prepare the trips_hhold dataframe for the destination solver function" ) nrows_trips_hhold = trips_hhold.shape[0] trips_hhold['dict_flag'] = trips_hhold['purpose'].astype(str) + '_' + trips_hhold['market_seg'].apply(str) + '_' + \ trips_hhold['peak_flag'].apply(str) # Second, there are many instances where the person only makes mandatory tours, in which case we don't need to # evaluate it. Thus, only keep records where the taz_j has a 0 to run the destination solver. Create a flag to # help identify the appropriate records control_parameters.logger.info( "Getting households and person trip records that have more than just manadatory trips," "thereby needing the destination solver.") tgr = trips_hhold.iloc[np.where(trips_hhold['taz_j'].values == 0)].\ groupby(['hhid', 'pid']).\ size().\ reset_index(name="count") tgr['solver_flag'] = 1 tgr.drop('count', axis=1, inplace=True) # transfer the flag information to the trips_hhold while holding it in a temp_df and slicing it to hold the # requisite records. trips_hhold = pd.merge(trips_hhold, tgr, how='left', on=['hhid', 'pid']) trips_hhold['solver_flag'].fillna(0, inplace=True) # create temp df to run through the destination solver temp_df = trips_hhold temp_df = temp_df.iloc[np.where(temp_df['solver_flag'].values == 1)] control_parameters.logger.info( "A total of %s nonmandatory trips will be assigned a origin and/or destination" % temp_df.shape[0]) control_parameters.logger.info( "Running the destination solver. Please be patient. There are too many records and machinations" "that need to be completed before it all ends.") trips_hhold_array = temp_df.values trips_hhold_array = self.destination_solver(trips_hhold_array, chaos_monkey) trips_hhold_dest_df = pd.DataFrame(trips_hhold_array) trips_hhold_dest_df.columns = trips_hhold.columns # The records are now concatenated back to the original trips_hhold df, but as a replacement control_parameters.logger.info("Concatenating the records back") trips_hhold = trips_hhold.iloc[np.where( trips_hhold['solver_flag'].values == 0)] trips_hhold = common.concat_df(trips_hhold, trips_hhold_dest_df, 0) trips_hhold.sort_values(['hhid', 'pid'], inplace=True, ascending=True) trips_hhold = trips_hhold.astype( dtype={ "hhid": "int32", "pid": "int8", "tour_id": "int8", "subtour_id": "int8", "trip_id": "int8", "activity_i": "category", "activity_j": "category", "taz_i": "int16", "taz_j": "int16", "tour_direction": "category", "purpose": "category", "trip_direction": "category", "peak_factor": "float64", "taz": "int16", "hhinc": "int32", "dtype": "int8", "hhsize": "int8", "nveh": "int8", "auto_suff": "int8", "market_seg": "int8", "rnum": "float64", "peak_flag": "int8", "dict_flag": "category", "solver_flag": "float64" }) # now batch out the necessary matrices control_parameters.logger.info( "Start saving the matrices in the format desired by Mlogit") for purpose in nonmandatory_purposes: nonmand_only = trips_hhold[trips_hhold['purpose'].values == purpose].copy() # set the market segment to 0 as NHB has no market segment and Bill's prob file will have this as 1. We # will reset 1 to 0 during mode choice discretization if purpose == "NHB": nonmand_only['market_seg'] = 0 # now loop over the peak periods for peak in range(0, 2): timeperiod_df = nonmand_only[nonmand_only['peak_flag'] == peak].copy() timeperiod_df = timeperiod_df.groupby(['taz_i', 'taz_j', 'purpose', 'market_seg']).size().\ reset_index(name='freq') # now loop over the segments for segment in timeperiod_df['market_seg'].unique(): # create filename and then groupby # only keep relevant cols and set a flag # Merge the ggh zones and the trip list and convert to wide format dataFrameDtype = common.set_dtype_defintions( control_parameters.inputDirListing, ev.EarlyValidFiles.getJSONFileList()) mtx_name = dataFrameDtype[ev.EarlyValidFiles.MATRIX_NAMES] # the matrices have to be given a specific filename that corresponds to the control file for MLOGIT fname = purpose + "_" + str(segment) + "_" + str(peak) for key, value in mtx_name.items(): if fname == key: fname_mtx = value control_parameters.logger.info( "The %s matrix is being saved" % fname_mtx) df_hbw = timeperiod_df[timeperiod_df['market_seg'] == segment].copy() df_hbw = df_hbw[['taz_i', 'taz_j']] df_hbw['probflag'] = 1 # Make square dataframe for Fortran df_hbw1 = pd.merge(self.ggh2, df_hbw, how="left", left_on=['ggh_zone_x', 'ggh_zone_y'], right_on=['taz_i', 'taz_j']) df_hbw2 = df_hbw1.pivot_table(index='ggh_zone_x', columns='ggh_zone_y', values='probflag', fill_value=0) to_fortran( df_hbw2, os.path.join( control_parameters.dirListing_mlogitmatrices, fname_mtx + '.bin'), n_columns=4000) return trips_hhold