def mixer_run(self): # bring in the GGHMV4's household and trip list files and attach a market segment to each household mprobe_valid.logger.info("Batch in the household and trips list files from a gghm run") hh = pd.read_csv(os.path.join(mprobe_valid.dirListing_abm, mprobe_valid.EarlyValidFiles_MlogitProbe.HOUSE_HOLDS_OUT)) trips = pd.read_csv(os.path.join(mprobe_valid.dirListing_abm, mprobe_valid.EarlyValidFiles_MlogitProbe.TRIPS_OUT)) hh = common.market_segment(hh) # tag each household by the market segment it belongs to # set dtypes for the household and trips dataframe to reduce memory requirements for key, value in self.dataFrameDtype[mprobe_valid.EarlyValidFiles_MlogitProbe.DTYPE_TRIPS].items(): trips[key] = trips[key].astype(value) for key, value in self.dataFrameDtype[mprobe_valid.EarlyValidFiles_MlogitProbe.DTYPE_HOUSEHOLDS].items(): hh[key] = hh[key].astype(value) # Merge the hholds info to the trips. By doing so, we can bring in a bunch of household attributes # including income, dwelling type, size, number of vehicles, and auto_sufficiency. Add in an integer # definition for one of six market segments. trips_hhold = pd.merge(trips, hh, how='left', left_on='hhid', right_on='hhid') return trips_hhold
def main(): control_parameters.logger.info("Processing Start") # the sequence in which the various classes are called is the following: # step 0: set seed from the control parameters.py # step 1: call and run VehicleSampling class # step 2: run peak and then off-peak mode sampling for each trip purpose prng = control_parameters.prng #################################################################################################################### control_parameters.logger.info( "Sample and attach vehicle type to the households") # TODO trips need to be set to the nonmand_pairs variable from runner_MProbe trips = pd.read_csv(r"C:\Personal\IMM\foo_nonmand_all.csv") # bring in the GGHMV4's household and trip list files and attach a market segment to each household control_parameters.logger.info( "Batch in the household and trips list files from a gghm run") hh = pd.read_csv( os.path.join(control_parameters.inputDirListing, ev.EarlyValidFiles.HOUSEHOLDS_OUT)) # trips = pd.read_csv(os.path.join(control_parameters.inputDirListing, ev.EarlyValidFiles.TRIPS_OUT)) veh_type = pd.read_csv( os.path.join(control_parameters.inputDirListing, ev.EarlyValidFiles.SAMPLE_VEH_PROPS)) # attach the market segment of each household hh = common.market_segment(hh) trips_vehtype = VehicleSampling().run(hh, trips, veh_type, prng) # trips_vehtype.iloc[0:500].to_csv(os.path.join(control_parameters.outputDirListing, 'foo_nonmand_veh.csv'), index = False) # #################################################################################################################### control_parameters.logger.info( "Sample and attach elemental modes to the peak HBW trip records") # PEAK ONLY trips_vehtype_pk = trips_vehtype.loc[trips_vehtype['peak_flag'] == 1] hbw_pk = ModeSampling(prng).run("PEAK", "HBW", trips_vehtype_pk) control_parameters.logger.info("HBW peak trips discretized.") # Need to set the market segment of the school and univ trips to zero trips_vehtype_pk_edu = trips_vehtype_pk.loc[ (trips_vehtype['purpose'] == 'HBS') | (trips_vehtype['purpose'] == 'HBU')] trips_vehtype_pk_edu['market_seg'] = 0 hbs_pk = ModeSampling().run( "PEAK", "HBS", trips_vehtype_pk_edu, prng).to_csv(r"c:/personal/imm/foo_hbspk_test.csv") hbu_pk = ModeSampling().run( "PEAK", "HBU", trips_vehtype_pk_edu, prng).to_csv(r"c:/personal/imm/foo_hbupk_test.csv") control_parameters.logger.info("HBS and HBU peak trips discretized.") hbo_pk = ModeSampling().run( "PEAK", "HBO", trips_vehtype_pk, prng).to_csv(r"c:/personal/imm/foo_hbopk_test.csv") control_parameters.logger.info("HBO peak trips discretized.") hbm_pk = ModeSampling().run( "PEAK", "HBM", trips_vehtype_pk, prng).to_csv(r"c:/personal/imm/foo_hbmpk_test.csv") control_parameters.logger.info("HBM peak trips discretized.") trips_vehtype_pk_nhb = trips_vehtype_pk.loc[trips_vehtype['purpose'] == 'NHB'] trips_vehtype_pk_nhb['market_seg'] = 0 nhb_pk = ModeSampling().run( "PEAK", "NHB", trips_vehtype_pk_nhb, prng).to_csv(r"c:/personal/imm/foo_nhbpk_test.csv") control_parameters.logger.info("NHB peak trips discretized.") wbo_pk = ModeSampling().run( "PEAK", "WBO", trips_vehtype_pk, prng).to_csv(r"c:/personal/imm/foo_wbopk_test.csv") control_parameters.logger.info("WBO peak trips discretized.") # common.logger.info("Sample and attach elemental modes to the off-peak HBW trip records") # mand_sample_offpk = ModeSampling(seed).run("OFF_PEAK", "HBW", trips_vehtype) # final_df = pd.concat([hbw_pk, hbs_pk, hbu_pk, hbo_pk, hbm_pk, nhb_pk, wbo_pk], axis=0).\ # to_csv(r"c:/personal/imm/foo_mand_test.csv") # mand_sample_offpk.iloc[0:25000].to_csv(r"c:/personal/imm/foo.csv") #################################################################################################################### common.logger.info("Processing Ended") print("")
def run(self): """ This function runs the vehicle sampling for the households :return: """ # bring in the GGHMV4's household and trip list files and attach a market segment to each household common.logger.info( "Batch in the household and trips list files from a gghm run") hh = pd.read_csv( os.path.join(control_parameters.dirListing, EarlyValidFiles.HOUSE_HOLDS_OUT)) trips = pd.read_csv( os.path.join(control_parameters.dirListing, EarlyValidFiles.TRIPS_OUT)) hh = common.market_segment( hh) # tag each household by the market segment it belongs to # set dtypes for the household and trips dataframe to reduce memory requirements for key, value in self.dataFrameDtype[ EarlyValidFiles.DTYPE_TRIPS].items(): trips[key] = trips[key].astype(value) for key, value in self.dataFrameDtype[ EarlyValidFiles.DTYPE_HOUSEHOLDS].items(): hh[key] = hh[key].astype(value) # bring in Vehicle Type file. Bryce's file has a certain structure (wide format) that Bill prefers for MLOGIT. veh_type = pd.read_csv( os.path.join(control_parameters.dirListing, EarlyValidFiles.SAMPLE_VEH_PROPS)) veh_type.rename(columns={'ggh_zone': 'taz'}, inplace=True) veh_type = pd.melt(veh_type, id_vars=['taz']).sort_values(['taz', 'variable']) # attach the market segment to the veh_type. Also add in an integer based market seg. # The vehicle type is part of the variable column but starts after the second underscore # thus it is stripped out into a column of its own. veh_type['mseg1'] = veh_type['variable'].str.split('_').str[0] veh_type['mseg2'] = veh_type['variable'].str.split('_').str[1] veh_type['mseg'] = veh_type['mseg1'] + '_' + veh_type['mseg2'] # add in integer based market segment category veh_type.loc[(veh_type['mseg'] == 'nocar_low'), 'market_seg'] = 0 veh_type.loc[(veh_type['mseg'] == 'nocar_high'), 'market_seg'] = 1 veh_type.loc[(veh_type['mseg'] == 'insuff_low'), 'market_seg'] = 2 veh_type.loc[(veh_type['mseg'] == 'insuff_high'), 'market_seg'] = 3 veh_type.loc[(veh_type['mseg'] == 'suff_low'), 'market_seg'] = 4 veh_type.loc[(veh_type['mseg'] == 'suff_high'), 'market_seg'] = 5 veh_type['market_seg'] = veh_type['market_seg'].astype('int8') # extract the vehicle type and drop unncessary columns veh_type['vtype1'] = veh_type['variable'].str.split('_').str[2] veh_type['vtype2'] = veh_type['variable'].str.split('_').str[3] veh_type['vtype'] = veh_type['vtype1'] + '_' + veh_type['vtype2'] columns = ['mseg1', 'mseg2', 'mseg', 'vtype1', 'vtype2'] veh_type.drop(columns, inplace=True, axis=1) ################################################################################################################ # RUN VEHICLE SAMPLING # invoke the class and run the vehicle sampling method. common.logger.info("Add vehicle type to every household") hh = self.assign_vehtype(hh, veh_type, self.seed) # only keep the hhid and veh_type column hh = hh[['hhid', 'hh_veh_type']] # dictionary of market and vehicle segment key and values market_seg_def = { 0: 'nocar_low', 1: "nocar_high", 2: "insuff_low", 3: "insuff_high", 4: "suff_low", 5: "suff_high" } # dictionary of veh segment key and values veh_seg_def = { 'trad_auto': 0, "trad_uber": 1, "av_auto": 2, "av_uber": 3 } # transfer the veh_type and market_segment by household id to the trips table. # Add in a descriptor for the market segment to make it easy to understand trips = pd.merge(trips, hh, on='hhid', how='left') # map the information and add flag trips['mseg'] = trips['market_seg'].map(market_seg_def) trips['vseg'] = trips['hh_veh_type'].map(veh_seg_def) trips['flag'] = trips['taz_i'].astype(str) + trips['taz_j'].astype( str) + trips['market_seg'].astype(str) common.logger.info( "Vehicle type information transferred to every trip record via household. This dataframe is" "now ready for mode sampling.") return trips
def main(): #################################################################################################################### control_parameters.logger.info("Bring in the GLOBAL parameters") prng = control_parameters.prng chaos_monkey = control_parameters.chaos_monkey #################################################################################################################### control_parameters.logger.info("Undertake EARLY VALIDATION") # check if files exist valid = ev.EarlyValidFiles.validation() #################################################################################################################### control_parameters.logger.info( "Batch in the household and trips file from a GGHM run and ATTACH MARKET SEGMENTS" ) hh = pd.read_csv( os.path.join(control_parameters.inputDirListing, ev.EarlyValidFiles.HOUSEHOLDS_OUT)) trips = pd.read_csv( os.path.join(control_parameters.inputDirListing, ev.EarlyValidFiles.TRIPS_OUT)) hh = common.market_segment( hh) # tag each household by the market segment it belongs to #################################################################################################################### control_parameters.logger.info( "Merge the household information to the trips file and also create the PEAK/OFF-PEAK" "flag.") hh, trips, trips_hhold = TripsHhold().run(trips, hh) trips_hhold = PeakOffpeak().run(trips_hhold, 1) #################################################################################################################### control_parameters.logger.info( "Run MLOGIT PROBE FOR MANDATORY TRIP PURPOSE and save O-D pairs that MLOGIT " "needs to produce probabilities for.") mand_purposes = ['HBW', 'HBS', 'HBU'] education = ['HBS', 'HBU'] # mand_prob_pairs = MandatoryFortranOd().run(trips_hhold, mand_purposes, education) #################################################################################################################### control_parameters.logger.info( "Run MLOGIT PROBE FOR NON-MANDATORY trip purposes. This will add an origin and/or " "destination to all non-mandatory trip records.") nonmandatory_purposes = ['HBO', 'HBM', 'WBO', 'NHB', 'HBE'] trips_hhold = trips_hhold[(trips_hhold['hhid'] > 0) & (trips_hhold['hhid'] <= 100000)].copy() trips_hhold['parallel_flag'] = trips_hhold.groupby('hhid').ngroup() # batch in the Other trip purpose trip matrices and create slices of the trips_hhold dataframe given the number of # cores being used all_other, ggh2 = common.batchin_binaryfiles() grp_list = NonMandatoryFortranOd(all_other).slice_dataframe_forparallel( trips_hhold, cpu_cores) # Create partial function for Pool func_partial = partial(NonMandatoryFortranOd(all_other).run_dest_solver, chaos_monkey=chaos_monkey) control_parameters.logger.info("Start parallel processing") result_list = [] pool = Pool(cpu_cores) with pool as p: result_list = p.map(func_partial, grp_list) control_parameters.logger.info("Multiprocessing completed") pool.close() pool.join() # One dataframe and reorder and set dtypes trips_hhold = pd.concat(result_list) trips_hhold = NonMandatoryFortranOd(all_other).reorder_setdtype( trips_hhold) # save the matrices for MLOGIT # NonMandatoryFortranOd(all_other).save_matrices(nonmandatory_purposes, trips_hhold, ggh2) #################################################################################################################### control_parameters.logger.info( "Start running MLOGIT. This will take close to 20 hours so sit back and relax." ) # launch_mlogit.LaunchingMlogit().runner_mlogit() #################################################################################################################### control_parameters.logger.info( "Sample and attach VEHICLE TYPE to the households. This is needed for the mode " "probabilities are segmented by vehicle type.") veh_type = pd.read_csv( os.path.join(control_parameters.inputDirListing, ev.EarlyValidFiles.SAMPLE_VEH_PROPS)) trips_hhold = VehicleSampling().run(hh, trips_hhold, veh_type, prng) ################################################################################################################### control_parameters.logger.info( "Sample and attach MODES to the PEAK trip records") hbe = trips_hhold[trips_hhold['purpose'] == "HBE"].copy() trips_hhold = trips_hhold[trips_hhold['purpose'] != "HBE"].copy() print("A total of %s trips will be assigned a mode" % trips_hhold.shape[0]) ##################################################### # PEAK ONLY trips_vehtype_pk = trips_hhold[trips_hhold['peak_flag'] == 1].copy() hbw_pk = ModeSampling(prng).run("PEAK", "HBW", trips_vehtype_pk) control_parameters.logger.info("HBW peak trips discretized.") # Need to set the market segment of the school and univ trips to zero trips_vehtype_pk_edu = trips_vehtype_pk[ (trips_vehtype_pk['purpose'] == 'HBS') | (trips_vehtype_pk['purpose'] == 'HBU')].copy() trips_vehtype_pk_edu['market_seg'] = 0 hbs_pk = ModeSampling(prng).run("PEAK", "HBS", trips_vehtype_pk_edu) hbu_pk = ModeSampling(prng).run("PEAK", "HBU", trips_vehtype_pk_edu) control_parameters.logger.info("HBS and HBU peak trips discretized.") hbo_pk = ModeSampling(prng).run("PEAK", "HBO", trips_vehtype_pk) control_parameters.logger.info("HBO peak trips discretized.") hbm_pk = ModeSampling(prng).run("PEAK", "HBM", trips_vehtype_pk) control_parameters.logger.info("HBM peak trips discretized.") trips_vehtype_pk_nhb = trips_vehtype_pk[trips_vehtype_pk['purpose'] == 'NHB'].copy() trips_vehtype_pk_nhb['market_seg'] = 0 nhb_pk = ModeSampling(prng).run("PEAK", "NHB", trips_vehtype_pk_nhb) control_parameters.logger.info("NHB peak trips discretized.") wbo_pk = ModeSampling(prng).run("PEAK", "WBO", trips_vehtype_pk) control_parameters.logger.info("WBO peak trips discretized.") all_peak_mc_discretized = pd.concat( [hbw_pk, hbs_pk, hbu_pk, hbo_pk, hbm_pk, wbo_pk, nhb_pk], axis=0) control_parameters.logger.info( "Mode Choice discretized for the PEAK period") ##################################################### # OFFPEAK ONLY trips_vehtype_offpk = trips_hhold[trips_hhold['peak_flag'] == 0].copy() hbw_offpk = ModeSampling(prng).run("OFF_PEAK", "HBW", trips_vehtype_offpk) control_parameters.logger.info("HBW off peak trips discretized.") # Need to set the market segment of the school and univ trips to zero trips_vehtype_offpk_edu = trips_vehtype_offpk[ (trips_vehtype_offpk['purpose'] == 'HBS') | (trips_vehtype_offpk['purpose'] == 'HBU')].copy() trips_vehtype_offpk_edu['market_seg'] = 0 hbs_offpk = ModeSampling(prng).run("OFF_PEAK", "HBS", trips_vehtype_offpk_edu) hbu_offpk = ModeSampling(prng).run("OFF_PEAK", "HBU", trips_vehtype_offpk_edu) control_parameters.logger.info("HBS and HBU off peak trips discretized.") hbo_offpk = ModeSampling(prng).run("OFF_PEAK", "HBO", trips_vehtype_offpk) control_parameters.logger.info("HBO off peak trips discretized.") hbm_offpk = ModeSampling(prng).run("OFF_PEAK", "HBM", trips_vehtype_offpk) control_parameters.logger.info("HBM off peak trips discretized.") trips_vehtype_offpk_nhb = trips_vehtype_offpk[ trips_vehtype_offpk['purpose'] == 'NHB'].copy() trips_vehtype_offpk_nhb['market_seg'] = 0 nhb_offpk = ModeSampling(prng).run("OFf_PEAK", "NHB", trips_vehtype_offpk_nhb) control_parameters.logger.info("NHB off peak trips discretized.") wbo_offpk = ModeSampling(prng).run("OFF_PEAK", "WBO", trips_vehtype_offpk) control_parameters.logger.info("WBO off peak trips discretized.") all_offpeak_mc_discretized = pd.concat([ hbw_offpk, hbs_offpk, hbu_offpk, hbo_offpk, hbm_offpk, wbo_offpk, nhb_offpk ], axis=0) control_parameters.logger.info( "Mode Choice discretized for the OFF PEAK period") all_discretized = pd.concat( [all_peak_mc_discretized, all_offpeak_mc_discretized], axis=0) all_discretized.to_csv(r"c:\\personal\\imm\\outputs\\foo_all_Aug29.csv") control_parameters.logger.info("Processing Ended") print("")
def run(self, mand_purposes, education): """ :return: """ # bring in the GGHMV4's household and trip list files and attach a market segment to each household mprobe_valid.logger.info( "Batch in the household and trips list files from a gghm run") hh = pd.read_csv( os.path.join( mprobe_valid.dirListing_abm, mprobe_valid.EarlyValidFiles_MlogitProbe.HOUSE_HOLDS_OUT)) trips = pd.read_csv( os.path.join(mprobe_valid.dirListing_abm, mprobe_valid.EarlyValidFiles_MlogitProbe.TRIPS_OUT)) hh = common.market_segment( hh) # tag each household by the market segment it belongs to # set dtypes for the household and trips dataframe to reduce memory requirements for key, value in self.dataFrameDtype[ mprobe_valid.EarlyValidFiles_MlogitProbe.DTYPE_TRIPS].items(): trips[key] = trips[key].astype(value) for key, value in self.dataFrameDtype[ mprobe_valid.EarlyValidFiles_MlogitProbe. DTYPE_HOUSEHOLDS].items(): hh[key] = hh[key].astype(value) # Merge the hholds info to the trips. By doing so, we can bring in a bunch of household attributes # including income, dwelling type, size, number of vehicles, and auto_sufficiency. Add in an integer # definition for one of six market segments. trips_hhold = pd.merge(trips, hh, how='left', left_on='hhid', right_on='hhid') trips_hhold = self.identify_peak(trips_hhold) # batch in ggh zone numbers and add in two columns for i and j zones ggh = pd.read_csv( os.path.join(mprobe_valid.dirListing_abm, mprobe_valid.EarlyValidFiles_MlogitProbe.GGH_EQUIV)) ggh['key'] = 0 # make a copy of the df and create a square matrix ggh1 = ggh ggh2 = pd.merge(ggh1, ggh, how='left', on='key') # generate the matrices desired by MLOGIT mprobe_valid.logger.info("Start evaluating the mandatory purposes") for purpose in mand_purposes: mprobe_valid.logger.info("Evaluating the %s purpose" % purpose) # because the school and university purposes don't have any market segmentation, set it to 0. if purpose in education: mand_only = trips_hhold.loc[( trips_hhold['purpose'] == purpose)] mand_only[ 'market_seg'] = 0 # set this to a defauly market segment of 0 else: mand_only = trips_hhold.loc[( trips_hhold['purpose'] == purpose)] # now loop over the peak periods for peak in range(0, 2): mprobe_valid.logger.info("Start evaluating the peak_flag %s" % peak) timeperiod_df = mand_only.loc[mand_only['peak_flag'] == peak] timeperiod_df = timeperiod_df.groupby( ['taz_i', 'taz_j', 'purpose', 'market_seg']).size().reset_index(name='freq') # now loop over the segments for segment in timeperiod_df['market_seg'].unique(): mprobe_valid.logger.info( "Start evaluating the segment %s" % segment) # create filename and then groupby # only keep relevant cols and set a flag # Merge the ggh zones and the trip list and convert to wide format fname = purpose + "_" + str(segment) df_hbw = timeperiod_df.loc[timeperiod_df['market_seg'] == segment] df_hbw = df_hbw[['taz_i', 'taz_j']] df_hbw['probflag'] = 1 # this merge is not necessary, but I am being on the safe side and bringing in the equiv file we have in TRESO-code df_hbw1 = pd.merge(ggh2, df_hbw, how="left", left_on=['ggh_zone_x', 'ggh_zone_y'], right_on=['taz_i', 'taz_j']) df_hbw2 = df_hbw1.pivot_table(index='ggh_zone_x', columns='ggh_zone_y', values='probflag', fill_value=0) mprobe_valid.logger.info( "Saving file to the requisite Fortran format") to_fortran(df_hbw2, os.path.join( mprobe_valid.dirListing_abm, fname + ' peak_flag ' + str(peak) + '.bin'), n_columns=4000)
def run(self): """ This function runs the vehicle sampling for the households :return: """ # bring in the GGHMV4's household and trip list files and attach a market segment to each household common.logger.info("Batch in the household and trips list files from a gghm run") hh = pd.read_csv(os.path.join(control_parameters.dirListing, EarlyValidFiles.HOUSE_HOLDS_OUT)) trips = pd.read_csv(os.path.join(control_parameters.dirListing, EarlyValidFiles.TRIPS_OUT)) hh = common.market_segment(hh) # tag each household by the market segment it belongs to # set dtypes for the household and trips dataframe to reduce memory requirements for key, value in self.dataFrameDtype[EarlyValidFiles.DTYPE_TRIPS].items(): trips[key] = trips[key].astype(value) for key, value in self.dataFrameDtype[EarlyValidFiles.DTYPE_HOUSEHOLDS].items(): hh[key] = hh[key].astype(value) # bring in Vehicle Type file. Bryce's file has a certain structure (wide format) that Bill prefers for MLOGIT. veh_type = pd.read_csv(os.path.join(control_parameters.dirListing, EarlyValidFiles.SAMPLE_VEH_PROPS)) veh_type.rename(columns={'ggh_zone': 'taz'}, inplace=True) veh_type = pd.melt(veh_type, id_vars=['taz']).sort_values(['taz', 'variable']) # attach the market segment to the veh_type. Also add in an integer based market seg. # The vehicle type is part of the variable column but starts after the second underscore # thus it is stripped out into a column of its own. veh_type['mseg1'] = veh_type['variable'].str.split('_').str[0] veh_type['mseg2'] = veh_type['variable'].str.split('_').str[1] veh_type['mseg'] = veh_type['mseg1'] + '_' + veh_type['mseg2'] # add in integer based market segment category veh_type.loc[(veh_type['mseg'] == 'nocar_low'), 'market_seg'] = 0 veh_type.loc[(veh_type['mseg'] == 'nocar_high'), 'market_seg'] = 1 veh_type.loc[(veh_type['mseg'] == 'insuff_low'), 'market_seg'] = 2 veh_type.loc[(veh_type['mseg'] == 'insuff_high'), 'market_seg'] = 3 veh_type.loc[(veh_type['mseg'] == 'suff_low'), 'market_seg'] = 4 veh_type.loc[(veh_type['mseg'] == 'suff_high'), 'market_seg'] = 5 veh_type['market_seg'] = veh_type['market_seg'].astype('int8') # extract the vehicle type and drop unncessary columns veh_type['vtype1'] = veh_type['variable'].str.split('_').str[2] veh_type['vtype2'] = veh_type['variable'].str.split('_').str[3] veh_type['vtype'] = veh_type['vtype1'] + '_' + veh_type['vtype2'] columns = ['mseg1', 'mseg2', 'mseg', 'vtype1', 'vtype2'] veh_type.drop(columns, inplace=True, axis=1) ################################################################################################################ # RUN VEHICLE SAMPLING # invoke the class and run the vehicle sampling method. common.logger.info("Add vehicle type to every household") hh = self.assign_vehtype(hh, veh_type, self.seed) # only keep the hhid and veh_type column hh = hh[['hhid', 'hh_veh_type']] # dictionary of market and vehicle segment key and values market_seg_def = { 0: 'nocar_low', 1: "nocar_high", 2: "insuff_low", 3: "insuff_high", 4: "suff_low", 5: "suff_high" } # dictionary of veh segment key and values veh_seg_def = { 'trad_auto': 0, "trad_uber": 1, "av_auto": 2, "av_uber": 3 } # transfer the veh_type and market_segment by household id to the trips table. # Add in a descriptor for the market segment to make it easy to understand trips = pd.merge(trips, hh, on='hhid', how='left') # map the information and add flag trips['mseg'] = trips['market_seg'].map(market_seg_def) trips['vseg'] = trips['hh_veh_type'].map(veh_seg_def) trips['flag'] = trips['taz_i'].astype(str) + trips['taz_j'].astype(str) + trips['market_seg'].astype(str) common.logger.info("Vehicle type information transferred to every trip record via household. This dataframe is" "now ready for mode sampling.") return trips