예제 #1
0
    def mixer_run(self):

        # bring in the GGHMV4's household and trip list files and attach a market segment to each household
        mprobe_valid.logger.info("Batch in the household and trips list files from a gghm run")
        hh = pd.read_csv(os.path.join(mprobe_valid.dirListing_abm, mprobe_valid.EarlyValidFiles_MlogitProbe.HOUSE_HOLDS_OUT))
        trips = pd.read_csv(os.path.join(mprobe_valid.dirListing_abm, mprobe_valid.EarlyValidFiles_MlogitProbe.TRIPS_OUT))
        hh = common.market_segment(hh)  # tag each household by the market segment it belongs to

        # set dtypes for the household and trips dataframe to reduce memory requirements
        for key, value in self.dataFrameDtype[mprobe_valid.EarlyValidFiles_MlogitProbe.DTYPE_TRIPS].items():
            trips[key] = trips[key].astype(value)

        for key, value in self.dataFrameDtype[mprobe_valid.EarlyValidFiles_MlogitProbe.DTYPE_HOUSEHOLDS].items():
            hh[key] = hh[key].astype(value)

        # Merge the hholds info to the trips. By doing so, we can bring in a bunch of household attributes
        # including income, dwelling type, size, number of vehicles, and auto_sufficiency. Add in an integer
        # definition for one of six market segments.
        trips_hhold = pd.merge(trips, hh, how='left', left_on='hhid', right_on='hhid')

        return trips_hhold
예제 #2
0
def main():

    control_parameters.logger.info("Processing Start")
    # the sequence in which the various classes are called is the following:
    # step 0: set seed from the control parameters.py
    # step 1: call and run VehicleSampling class
    # step 2: run peak and then off-peak mode sampling for each trip purpose
    prng = control_parameters.prng

    ####################################################################################################################
    control_parameters.logger.info(
        "Sample and attach vehicle type to the households")

    # TODO trips need to be set to the nonmand_pairs variable from runner_MProbe
    trips = pd.read_csv(r"C:\Personal\IMM\foo_nonmand_all.csv")

    # bring in the GGHMV4's household and trip list files and attach a market segment to each household
    control_parameters.logger.info(
        "Batch in the household and trips list files from a gghm run")
    hh = pd.read_csv(
        os.path.join(control_parameters.inputDirListing,
                     ev.EarlyValidFiles.HOUSEHOLDS_OUT))
    # trips = pd.read_csv(os.path.join(control_parameters.inputDirListing, ev.EarlyValidFiles.TRIPS_OUT))
    veh_type = pd.read_csv(
        os.path.join(control_parameters.inputDirListing,
                     ev.EarlyValidFiles.SAMPLE_VEH_PROPS))

    # attach the market segment of each household
    hh = common.market_segment(hh)

    trips_vehtype = VehicleSampling().run(hh, trips, veh_type, prng)
    # trips_vehtype.iloc[0:500].to_csv(os.path.join(control_parameters.outputDirListing, 'foo_nonmand_veh.csv'), index = False)

    # ####################################################################################################################
    control_parameters.logger.info(
        "Sample and attach elemental modes to the peak HBW trip records")

    # PEAK ONLY

    trips_vehtype_pk = trips_vehtype.loc[trips_vehtype['peak_flag'] == 1]

    hbw_pk = ModeSampling(prng).run("PEAK", "HBW", trips_vehtype_pk)
    control_parameters.logger.info("HBW peak trips discretized.")

    # Need to set the market segment of the school and univ trips to zero
    trips_vehtype_pk_edu = trips_vehtype_pk.loc[
        (trips_vehtype['purpose'] == 'HBS') |
        (trips_vehtype['purpose'] == 'HBU')]
    trips_vehtype_pk_edu['market_seg'] = 0
    hbs_pk = ModeSampling().run(
        "PEAK", "HBS", trips_vehtype_pk_edu,
        prng).to_csv(r"c:/personal/imm/foo_hbspk_test.csv")
    hbu_pk = ModeSampling().run(
        "PEAK", "HBU", trips_vehtype_pk_edu,
        prng).to_csv(r"c:/personal/imm/foo_hbupk_test.csv")
    control_parameters.logger.info("HBS and HBU peak trips discretized.")

    hbo_pk = ModeSampling().run(
        "PEAK", "HBO", trips_vehtype_pk,
        prng).to_csv(r"c:/personal/imm/foo_hbopk_test.csv")
    control_parameters.logger.info("HBO peak trips discretized.")

    hbm_pk = ModeSampling().run(
        "PEAK", "HBM", trips_vehtype_pk,
        prng).to_csv(r"c:/personal/imm/foo_hbmpk_test.csv")
    control_parameters.logger.info("HBM peak trips discretized.")

    trips_vehtype_pk_nhb = trips_vehtype_pk.loc[trips_vehtype['purpose'] ==
                                                'NHB']
    trips_vehtype_pk_nhb['market_seg'] = 0
    nhb_pk = ModeSampling().run(
        "PEAK", "NHB", trips_vehtype_pk_nhb,
        prng).to_csv(r"c:/personal/imm/foo_nhbpk_test.csv")
    control_parameters.logger.info("NHB peak trips discretized.")

    wbo_pk = ModeSampling().run(
        "PEAK", "WBO", trips_vehtype_pk,
        prng).to_csv(r"c:/personal/imm/foo_wbopk_test.csv")
    control_parameters.logger.info("WBO peak trips discretized.")

    # common.logger.info("Sample and attach elemental modes to the off-peak HBW trip records")
    # mand_sample_offpk = ModeSampling(seed).run("OFF_PEAK", "HBW", trips_vehtype)

    # final_df = pd.concat([hbw_pk, hbs_pk, hbu_pk, hbo_pk, hbm_pk, nhb_pk, wbo_pk], axis=0).\
    #     to_csv(r"c:/personal/imm/foo_mand_test.csv")
    # mand_sample_offpk.iloc[0:25000].to_csv(r"c:/personal/imm/foo.csv")

    ####################################################################################################################

    common.logger.info("Processing Ended")

    print("")
예제 #3
0
    def run(self):
        """
        This function runs the vehicle sampling for the households


        :return:
        """

        # bring in the GGHMV4's household and trip list files and attach a market segment to each household
        common.logger.info(
            "Batch in the household and trips list files from a gghm run")
        hh = pd.read_csv(
            os.path.join(control_parameters.dirListing,
                         EarlyValidFiles.HOUSE_HOLDS_OUT))
        trips = pd.read_csv(
            os.path.join(control_parameters.dirListing,
                         EarlyValidFiles.TRIPS_OUT))
        hh = common.market_segment(
            hh)  # tag each household by the market segment it belongs to

        # set dtypes for the household and trips dataframe to reduce memory requirements
        for key, value in self.dataFrameDtype[
                EarlyValidFiles.DTYPE_TRIPS].items():
            trips[key] = trips[key].astype(value)

        for key, value in self.dataFrameDtype[
                EarlyValidFiles.DTYPE_HOUSEHOLDS].items():
            hh[key] = hh[key].astype(value)

        # bring in Vehicle Type file. Bryce's file has a certain structure (wide format) that Bill prefers for MLOGIT.
        veh_type = pd.read_csv(
            os.path.join(control_parameters.dirListing,
                         EarlyValidFiles.SAMPLE_VEH_PROPS))
        veh_type.rename(columns={'ggh_zone': 'taz'}, inplace=True)
        veh_type = pd.melt(veh_type,
                           id_vars=['taz']).sort_values(['taz', 'variable'])

        # attach the market segment to the veh_type. Also add in an integer based market seg.
        # The vehicle type is part of the variable column but starts after the second underscore
        # thus it is stripped out into a column of its own.
        veh_type['mseg1'] = veh_type['variable'].str.split('_').str[0]
        veh_type['mseg2'] = veh_type['variable'].str.split('_').str[1]
        veh_type['mseg'] = veh_type['mseg1'] + '_' + veh_type['mseg2']

        # add in integer based market segment category
        veh_type.loc[(veh_type['mseg'] == 'nocar_low'), 'market_seg'] = 0
        veh_type.loc[(veh_type['mseg'] == 'nocar_high'), 'market_seg'] = 1
        veh_type.loc[(veh_type['mseg'] == 'insuff_low'), 'market_seg'] = 2
        veh_type.loc[(veh_type['mseg'] == 'insuff_high'), 'market_seg'] = 3
        veh_type.loc[(veh_type['mseg'] == 'suff_low'), 'market_seg'] = 4
        veh_type.loc[(veh_type['mseg'] == 'suff_high'), 'market_seg'] = 5
        veh_type['market_seg'] = veh_type['market_seg'].astype('int8')

        # extract the vehicle type and drop unncessary columns
        veh_type['vtype1'] = veh_type['variable'].str.split('_').str[2]
        veh_type['vtype2'] = veh_type['variable'].str.split('_').str[3]
        veh_type['vtype'] = veh_type['vtype1'] + '_' + veh_type['vtype2']
        columns = ['mseg1', 'mseg2', 'mseg', 'vtype1', 'vtype2']
        veh_type.drop(columns, inplace=True, axis=1)

        ################################################################################################################
        # RUN VEHICLE SAMPLING

        # invoke the class and run the vehicle sampling method.
        common.logger.info("Add vehicle type to every household")
        hh = self.assign_vehtype(hh, veh_type, self.seed)

        # only keep the hhid and veh_type column
        hh = hh[['hhid', 'hh_veh_type']]

        # dictionary of market and vehicle segment key and values
        market_seg_def = {
            0: 'nocar_low',
            1: "nocar_high",
            2: "insuff_low",
            3: "insuff_high",
            4: "suff_low",
            5: "suff_high"
        }

        # dictionary of veh segment key and values
        veh_seg_def = {
            'trad_auto': 0,
            "trad_uber": 1,
            "av_auto": 2,
            "av_uber": 3
        }

        # transfer the veh_type and market_segment by household id to the trips table.
        # Add in a descriptor for the market segment to make it easy to understand
        trips = pd.merge(trips, hh, on='hhid', how='left')

        # map the information and add flag
        trips['mseg'] = trips['market_seg'].map(market_seg_def)
        trips['vseg'] = trips['hh_veh_type'].map(veh_seg_def)
        trips['flag'] = trips['taz_i'].astype(str) + trips['taz_j'].astype(
            str) + trips['market_seg'].astype(str)

        common.logger.info(
            "Vehicle type information transferred to every trip record via household. This dataframe is"
            "now ready for mode sampling.")
        return trips
예제 #4
0
def main():

    ####################################################################################################################
    control_parameters.logger.info("Bring in the GLOBAL parameters")
    prng = control_parameters.prng
    chaos_monkey = control_parameters.chaos_monkey

    ####################################################################################################################
    control_parameters.logger.info("Undertake EARLY VALIDATION")
    # check if files exist
    valid = ev.EarlyValidFiles.validation()

    ####################################################################################################################
    control_parameters.logger.info(
        "Batch in the household and trips file from a GGHM run and ATTACH MARKET SEGMENTS"
    )
    hh = pd.read_csv(
        os.path.join(control_parameters.inputDirListing,
                     ev.EarlyValidFiles.HOUSEHOLDS_OUT))
    trips = pd.read_csv(
        os.path.join(control_parameters.inputDirListing,
                     ev.EarlyValidFiles.TRIPS_OUT))
    hh = common.market_segment(
        hh)  # tag each household by the market segment it belongs to

    ####################################################################################################################
    control_parameters.logger.info(
        "Merge the household information to the trips file and also create the PEAK/OFF-PEAK"
        "flag.")
    hh, trips, trips_hhold = TripsHhold().run(trips, hh)
    trips_hhold = PeakOffpeak().run(trips_hhold, 1)

    ####################################################################################################################
    control_parameters.logger.info(
        "Run MLOGIT PROBE FOR MANDATORY TRIP PURPOSE and save O-D pairs that MLOGIT "
        "needs to produce probabilities for.")
    mand_purposes = ['HBW', 'HBS', 'HBU']
    education = ['HBS', 'HBU']
    # mand_prob_pairs = MandatoryFortranOd().run(trips_hhold, mand_purposes, education)

    ####################################################################################################################
    control_parameters.logger.info(
        "Run MLOGIT PROBE FOR NON-MANDATORY trip purposes. This will add an origin and/or "
        "destination to all non-mandatory trip records.")
    nonmandatory_purposes = ['HBO', 'HBM', 'WBO', 'NHB', 'HBE']
    trips_hhold = trips_hhold[(trips_hhold['hhid'] > 0)
                              & (trips_hhold['hhid'] <= 100000)].copy()
    trips_hhold['parallel_flag'] = trips_hhold.groupby('hhid').ngroup()

    # batch in the Other trip purpose trip matrices and create slices of the trips_hhold dataframe given the number of
    # cores being used
    all_other, ggh2 = common.batchin_binaryfiles()
    grp_list = NonMandatoryFortranOd(all_other).slice_dataframe_forparallel(
        trips_hhold, cpu_cores)

    # Create partial function for Pool
    func_partial = partial(NonMandatoryFortranOd(all_other).run_dest_solver,
                           chaos_monkey=chaos_monkey)

    control_parameters.logger.info("Start parallel processing")

    result_list = []
    pool = Pool(cpu_cores)
    with pool as p:

        result_list = p.map(func_partial, grp_list)
        control_parameters.logger.info("Multiprocessing completed")

    pool.close()
    pool.join()

    # One dataframe and reorder and set dtypes
    trips_hhold = pd.concat(result_list)
    trips_hhold = NonMandatoryFortranOd(all_other).reorder_setdtype(
        trips_hhold)

    # save the matrices for MLOGIT
    # NonMandatoryFortranOd(all_other).save_matrices(nonmandatory_purposes, trips_hhold, ggh2)
    ####################################################################################################################

    control_parameters.logger.info(
        "Start running MLOGIT. This will take close to 20 hours so sit back and relax."
    )
    # launch_mlogit.LaunchingMlogit().runner_mlogit()

    ####################################################################################################################
    control_parameters.logger.info(
        "Sample and attach VEHICLE TYPE to the households. This is needed for the mode "
        "probabilities are segmented by vehicle type.")
    veh_type = pd.read_csv(
        os.path.join(control_parameters.inputDirListing,
                     ev.EarlyValidFiles.SAMPLE_VEH_PROPS))
    trips_hhold = VehicleSampling().run(hh, trips_hhold, veh_type, prng)

    ###################################################################################################################
    control_parameters.logger.info(
        "Sample and attach MODES to the PEAK trip records")

    hbe = trips_hhold[trips_hhold['purpose'] == "HBE"].copy()
    trips_hhold = trips_hhold[trips_hhold['purpose'] != "HBE"].copy()
    print("A total of %s trips will be assigned a mode" % trips_hhold.shape[0])

    #####################################################
    # PEAK ONLY
    trips_vehtype_pk = trips_hhold[trips_hhold['peak_flag'] == 1].copy()

    hbw_pk = ModeSampling(prng).run("PEAK", "HBW", trips_vehtype_pk)
    control_parameters.logger.info("HBW peak trips discretized.")

    # Need to set the market segment of the school and univ trips to zero
    trips_vehtype_pk_edu = trips_vehtype_pk[
        (trips_vehtype_pk['purpose'] == 'HBS') |
        (trips_vehtype_pk['purpose'] == 'HBU')].copy()
    trips_vehtype_pk_edu['market_seg'] = 0
    hbs_pk = ModeSampling(prng).run("PEAK", "HBS", trips_vehtype_pk_edu)
    hbu_pk = ModeSampling(prng).run("PEAK", "HBU", trips_vehtype_pk_edu)
    control_parameters.logger.info("HBS and HBU peak trips discretized.")

    hbo_pk = ModeSampling(prng).run("PEAK", "HBO", trips_vehtype_pk)
    control_parameters.logger.info("HBO peak trips discretized.")

    hbm_pk = ModeSampling(prng).run("PEAK", "HBM", trips_vehtype_pk)
    control_parameters.logger.info("HBM peak trips discretized.")

    trips_vehtype_pk_nhb = trips_vehtype_pk[trips_vehtype_pk['purpose'] ==
                                            'NHB'].copy()
    trips_vehtype_pk_nhb['market_seg'] = 0
    nhb_pk = ModeSampling(prng).run("PEAK", "NHB", trips_vehtype_pk_nhb)
    control_parameters.logger.info("NHB peak trips discretized.")

    wbo_pk = ModeSampling(prng).run("PEAK", "WBO", trips_vehtype_pk)
    control_parameters.logger.info("WBO peak trips discretized.")

    all_peak_mc_discretized = pd.concat(
        [hbw_pk, hbs_pk, hbu_pk, hbo_pk, hbm_pk, wbo_pk, nhb_pk], axis=0)

    control_parameters.logger.info(
        "Mode Choice discretized for the PEAK period")

    #####################################################
    # OFFPEAK ONLY
    trips_vehtype_offpk = trips_hhold[trips_hhold['peak_flag'] == 0].copy()

    hbw_offpk = ModeSampling(prng).run("OFF_PEAK", "HBW", trips_vehtype_offpk)
    control_parameters.logger.info("HBW off peak trips discretized.")

    # Need to set the market segment of the school and univ trips to zero
    trips_vehtype_offpk_edu = trips_vehtype_offpk[
        (trips_vehtype_offpk['purpose'] == 'HBS') |
        (trips_vehtype_offpk['purpose'] == 'HBU')].copy()
    trips_vehtype_offpk_edu['market_seg'] = 0
    hbs_offpk = ModeSampling(prng).run("OFF_PEAK", "HBS",
                                       trips_vehtype_offpk_edu)
    hbu_offpk = ModeSampling(prng).run("OFF_PEAK", "HBU",
                                       trips_vehtype_offpk_edu)
    control_parameters.logger.info("HBS and HBU off peak trips discretized.")

    hbo_offpk = ModeSampling(prng).run("OFF_PEAK", "HBO", trips_vehtype_offpk)
    control_parameters.logger.info("HBO off peak trips discretized.")

    hbm_offpk = ModeSampling(prng).run("OFF_PEAK", "HBM", trips_vehtype_offpk)
    control_parameters.logger.info("HBM off peak trips discretized.")

    trips_vehtype_offpk_nhb = trips_vehtype_offpk[
        trips_vehtype_offpk['purpose'] == 'NHB'].copy()
    trips_vehtype_offpk_nhb['market_seg'] = 0
    nhb_offpk = ModeSampling(prng).run("OFf_PEAK", "NHB",
                                       trips_vehtype_offpk_nhb)
    control_parameters.logger.info("NHB off peak trips discretized.")

    wbo_offpk = ModeSampling(prng).run("OFF_PEAK", "WBO", trips_vehtype_offpk)
    control_parameters.logger.info("WBO off peak trips discretized.")

    all_offpeak_mc_discretized = pd.concat([
        hbw_offpk, hbs_offpk, hbu_offpk, hbo_offpk, hbm_offpk, wbo_offpk,
        nhb_offpk
    ],
                                           axis=0)

    control_parameters.logger.info(
        "Mode Choice discretized for the OFF PEAK period")

    all_discretized = pd.concat(
        [all_peak_mc_discretized, all_offpeak_mc_discretized], axis=0)

    all_discretized.to_csv(r"c:\\personal\\imm\\outputs\\foo_all_Aug29.csv")

    control_parameters.logger.info("Processing Ended")
    print("")
예제 #5
0
    def run(self, mand_purposes, education):
        """


        :return:
        """
        # bring in the GGHMV4's household and trip list files and attach a market segment to each household
        mprobe_valid.logger.info(
            "Batch in the household and trips list files from a gghm run")
        hh = pd.read_csv(
            os.path.join(
                mprobe_valid.dirListing_abm,
                mprobe_valid.EarlyValidFiles_MlogitProbe.HOUSE_HOLDS_OUT))
        trips = pd.read_csv(
            os.path.join(mprobe_valid.dirListing_abm,
                         mprobe_valid.EarlyValidFiles_MlogitProbe.TRIPS_OUT))
        hh = common.market_segment(
            hh)  # tag each household by the market segment it belongs to

        # set dtypes for the household and trips dataframe to reduce memory requirements
        for key, value in self.dataFrameDtype[
                mprobe_valid.EarlyValidFiles_MlogitProbe.DTYPE_TRIPS].items():
            trips[key] = trips[key].astype(value)

        for key, value in self.dataFrameDtype[
                mprobe_valid.EarlyValidFiles_MlogitProbe.
                DTYPE_HOUSEHOLDS].items():
            hh[key] = hh[key].astype(value)

        # Merge the hholds info to the trips. By doing so, we can bring in a bunch of household attributes
        # including income, dwelling type, size, number of vehicles, and auto_sufficiency. Add in an integer
        # definition for one of six market segments.
        trips_hhold = pd.merge(trips,
                               hh,
                               how='left',
                               left_on='hhid',
                               right_on='hhid')
        trips_hhold = self.identify_peak(trips_hhold)

        # batch in ggh zone numbers and add in two columns for i and j zones
        ggh = pd.read_csv(
            os.path.join(mprobe_valid.dirListing_abm,
                         mprobe_valid.EarlyValidFiles_MlogitProbe.GGH_EQUIV))
        ggh['key'] = 0
        # make a copy of the df and create a square matrix
        ggh1 = ggh
        ggh2 = pd.merge(ggh1, ggh, how='left', on='key')

        # generate the matrices desired by MLOGIT
        mprobe_valid.logger.info("Start evaluating the mandatory purposes")
        for purpose in mand_purposes:

            mprobe_valid.logger.info("Evaluating the %s purpose" % purpose)

            # because the school and university purposes don't have any market segmentation, set it to 0.
            if purpose in education:
                mand_only = trips_hhold.loc[(
                    trips_hhold['purpose'] == purpose)]
                mand_only[
                    'market_seg'] = 0  # set this to a defauly market segment of 0
            else:
                mand_only = trips_hhold.loc[(
                    trips_hhold['purpose'] == purpose)]

                # now loop over the peak periods
            for peak in range(0, 2):
                mprobe_valid.logger.info("Start evaluating the peak_flag %s" %
                                         peak)

                timeperiod_df = mand_only.loc[mand_only['peak_flag'] == peak]
                timeperiod_df = timeperiod_df.groupby(
                    ['taz_i', 'taz_j', 'purpose',
                     'market_seg']).size().reset_index(name='freq')

                # now loop over the segments
                for segment in timeperiod_df['market_seg'].unique():
                    mprobe_valid.logger.info(
                        "Start evaluating the segment %s" % segment)
                    # create filename and then groupby
                    # only keep relevant cols and set a flag
                    # Merge the ggh zones and the trip list and convert to wide format

                    fname = purpose + "_" + str(segment)
                    df_hbw = timeperiod_df.loc[timeperiod_df['market_seg'] ==
                                               segment]
                    df_hbw = df_hbw[['taz_i', 'taz_j']]
                    df_hbw['probflag'] = 1

                    # this merge is not necessary, but I am being on the safe side and bringing in the equiv file we have in TRESO-code
                    df_hbw1 = pd.merge(ggh2,
                                       df_hbw,
                                       how="left",
                                       left_on=['ggh_zone_x', 'ggh_zone_y'],
                                       right_on=['taz_i', 'taz_j'])
                    df_hbw2 = df_hbw1.pivot_table(index='ggh_zone_x',
                                                  columns='ggh_zone_y',
                                                  values='probflag',
                                                  fill_value=0)

                    mprobe_valid.logger.info(
                        "Saving file to the requisite Fortran format")
                    to_fortran(df_hbw2,
                               os.path.join(
                                   mprobe_valid.dirListing_abm,
                                   fname + ' peak_flag ' + str(peak) + '.bin'),
                               n_columns=4000)
예제 #6
0
    def run(self):
        """
        This function runs the vehicle sampling for the households


        :return:
        """

        # bring in the GGHMV4's household and trip list files and attach a market segment to each household
        common.logger.info("Batch in the household and trips list files from a gghm run")
        hh = pd.read_csv(os.path.join(control_parameters.dirListing, EarlyValidFiles.HOUSE_HOLDS_OUT))
        trips = pd.read_csv(os.path.join(control_parameters.dirListing, EarlyValidFiles.TRIPS_OUT))
        hh = common.market_segment(hh)  # tag each household by the market segment it belongs to

        # set dtypes for the household and trips dataframe to reduce memory requirements
        for key, value in self.dataFrameDtype[EarlyValidFiles.DTYPE_TRIPS].items():
            trips[key] = trips[key].astype(value)

        for key, value in self.dataFrameDtype[EarlyValidFiles.DTYPE_HOUSEHOLDS].items():
            hh[key] = hh[key].astype(value)

        # bring in Vehicle Type file. Bryce's file has a certain structure (wide format) that Bill prefers for MLOGIT.
        veh_type = pd.read_csv(os.path.join(control_parameters.dirListing, EarlyValidFiles.SAMPLE_VEH_PROPS))
        veh_type.rename(columns={'ggh_zone': 'taz'}, inplace=True)
        veh_type = pd.melt(veh_type, id_vars=['taz']).sort_values(['taz', 'variable'])

        # attach the market segment to the veh_type. Also add in an integer based market seg.
        # The vehicle type is part of the variable column but starts after the second underscore
        # thus it is stripped out into a column of its own.
        veh_type['mseg1'] = veh_type['variable'].str.split('_').str[0]
        veh_type['mseg2'] = veh_type['variable'].str.split('_').str[1]
        veh_type['mseg'] = veh_type['mseg1'] + '_' + veh_type['mseg2']

        # add in integer based market segment category
        veh_type.loc[(veh_type['mseg'] == 'nocar_low'), 'market_seg'] = 0
        veh_type.loc[(veh_type['mseg'] == 'nocar_high'), 'market_seg'] = 1
        veh_type.loc[(veh_type['mseg'] == 'insuff_low'), 'market_seg'] = 2
        veh_type.loc[(veh_type['mseg'] == 'insuff_high'), 'market_seg'] = 3
        veh_type.loc[(veh_type['mseg'] == 'suff_low'), 'market_seg'] = 4
        veh_type.loc[(veh_type['mseg'] == 'suff_high'), 'market_seg'] = 5
        veh_type['market_seg'] = veh_type['market_seg'].astype('int8')

        # extract the vehicle type and drop unncessary columns
        veh_type['vtype1'] = veh_type['variable'].str.split('_').str[2]
        veh_type['vtype2'] = veh_type['variable'].str.split('_').str[3]
        veh_type['vtype'] = veh_type['vtype1'] + '_' + veh_type['vtype2']
        columns = ['mseg1', 'mseg2', 'mseg', 'vtype1', 'vtype2']
        veh_type.drop(columns, inplace=True, axis=1)

        ################################################################################################################
        # RUN VEHICLE SAMPLING

        # invoke the class and run the vehicle sampling method.
        common.logger.info("Add vehicle type to every household")
        hh = self.assign_vehtype(hh, veh_type, self.seed)

        # only keep the hhid and veh_type column
        hh = hh[['hhid', 'hh_veh_type']]

        # dictionary of market and vehicle segment key and values
        market_seg_def = {
            0: 'nocar_low',
            1: "nocar_high",
            2: "insuff_low",
            3: "insuff_high",
            4: "suff_low",
            5: "suff_high"
        }

        # dictionary of veh segment key and values
        veh_seg_def = {
            'trad_auto': 0,
            "trad_uber": 1,
            "av_auto": 2,
            "av_uber": 3
        }

        # transfer the veh_type and market_segment by household id to the trips table.
        # Add in a descriptor for the market segment to make it easy to understand
        trips = pd.merge(trips, hh, on='hhid', how='left')

        # map the information and add flag
        trips['mseg'] = trips['market_seg'].map(market_seg_def)
        trips['vseg'] = trips['hh_veh_type'].map(veh_seg_def)
        trips['flag'] = trips['taz_i'].astype(str) + trips['taz_j'].astype(str) + trips['market_seg'].astype(str)

        common.logger.info("Vehicle type information transferred to every trip record via household. This dataframe is"
                           "now ready for mode sampling.")
        return trips