Exemple #1
0
def build_year_1(nZones=9,
                 transit_scope=slice(2, 8),
                 n_HH=834,
                 directory=None,
                 seed=0):

    global _cache_1, _directory
    if _cache_1 is not None:
        return _cache_1

    flog("EXAMPVILLE Builder (Year 1)")
    flog("  simulating a survey of {} households", n_HH)
    flog("  traveling among {} travel analysis zones", nZones)

    if directory is None:
        if _directory is not None:
            directory = _directory
        else:
            from ..util.temporaryfile import TemporaryDirectory
            _directory = directory = TemporaryDirectory()
    else:
        _directory = directory

    if isinstance(transit_scope, tuple):
        transit_scope = slice(*transit_scope)

    if transit_scope.stop > nZones:
        raise TypeError('transit_scope too large for nZones')

    # The randomizer seed is reset to zero by default so that we get consistent generated
    # Exampville survey results.  (High levels of randomness are not very important
    # here because we just want to demonstrate the models.)
    numpy.random.seed(seed)

    ## Zones
    flog("Zones")
    pop_weight = numpy.fmax(numpy.arange(nZones),
                            numpy.flipud(numpy.arange(nZones))).astype(float)
    pop_weight /= pop_weight.sum()
    wrk_weight = scipy.stats.binom.pmf(numpy.arange(nZones), nZones - 1, .5)
    wrk_weight /= wrk_weight.sum()

    zone_lat = (-1)**numpy.arange(nZones)
    zone_lon = 11 + numpy.arange(nZones)

    ## Skims
    flog("Skims")
    distance = numpy.zeros([nZones, nZones])
    gaps = numpy.random.random(nZones) + 1
    for z in range(nZones):
        for y in range(z, nZones):
            distance[y, z] = distance[z, y] = gaps[z:y].sum()
        distance[z, z] = numpy.random.random()

    drivetime = (numpy.random.exponential(1, [nZones, nZones]) + 1) * distance
    drivetime[drivetime < 2.0] = 2.0
    hov_time = drivetime.copy()
    hov_time[:nZones // 2, nZones // 2:] -= numpy.random.random() * 1.5 + 0.5
    hov_time[nZones // 2:, :nZones // 2] -= numpy.random.random() * 1.5 + 0.5

    transit_range = transit_scope.stop - transit_scope.start
    transittime = numpy.zeros([nZones, nZones])
    transittime[
        transit_scope,
        transit_scope] = (numpy.random.random([transit_range, transit_range]) +
                          1) * distance[transit_scope, transit_scope]

    transitfare = numpy.zeros([nZones, nZones])
    transitfare[transit_scope, transit_scope] = 1.5

    ## Households
    flog("HHs")
    HHidx = numpy.arange(n_HH, dtype=numpy.int64)
    HHid = numpy.asarray([50000 + i for i in HHidx])
    HHincome = numpy.round(numpy.random.normal(75000, 25000, [
        n_HH,
    ]), -3).astype(numpy.int64)
    HHsize = numpy.floor(
        numpy.random.exponential(0.8, [
            n_HH,
        ]) + 1 + numpy.random.random([
            n_HH,
        ])).astype(numpy.int64)
    HHhomezone = numpy.random.choice(numpy.arange(1, nZones + 1),
                                     size=[
                                         n_HH,
                                     ],
                                     replace=True,
                                     p=pop_weight)

    ## People
    flog("People")
    n_PER = numpy.sum(HHsize)
    PERidx = numpy.arange(n_PER, dtype=numpy.int64)
    PERid = numpy.asarray([60000 + i for i in PERidx])
    PERhhid = numpy.zeros(n_PER, dtype=numpy.int64)
    PERhhidx = numpy.zeros(n_PER, dtype=numpy.int64)
    n2 = 0
    for n1 in range(n_HH):
        PERhhid[n2:(n2 + HHsize[n1])] = HHid[n1]
        PERhhidx[n2:(n2 + HHsize[n1])] = HHidx[n1]
        n2 += HHsize[n1]
    PERage = (numpy.random.random(n_PER) * 80 + 5).astype(numpy.int64)
    PERworks = ((numpy.random.random(n_PER) > 0.2) & (PERage > 16) &
                (PERage < 70)).astype(numpy.int64)

    zone_employment = numpy.round(PERworks.sum() * wrk_weight, 0) + 1

    total_employment = zone_employment.sum()
    mean_employment = total_employment / nZones
    zone_retail = numpy.fmin(
        numpy.round(numpy.random.random(nZones) * mean_employment, 0),
        zone_employment)
    zone_nonretail = zone_employment - zone_retail

    PERnworktours = numpy.random.choice(
        [0, 1, 2, 3], size=[
            n_PER,
        ], replace=True, p=[0.1, 0.8, 0.07, 0.03]) * PERworks
    PERnothertours = numpy.random.choice([0, 1, 2, 3],
                                         size=[
                                             n_PER,
                                         ],
                                         replace=True,
                                         p=[0.2, 0.5, 0.2, 0.1])
    PERntours = PERnworktours + PERnothertours

    ## Tours
    flog("Tours")

    n_TOUR = PERntours.sum()

    TOURid = numpy.arange(n_TOUR, dtype=numpy.int64)
    TOURper = numpy.zeros(n_TOUR, dtype=numpy.int64)
    TOURperidx = numpy.zeros(n_TOUR, dtype=numpy.int64)
    TOURhh = numpy.zeros(n_TOUR, dtype=numpy.int64)
    TOURhhidx = numpy.zeros(n_TOUR, dtype=numpy.int64)
    TOURdtaz = numpy.zeros(n_TOUR, dtype=numpy.int64)
    TOURmode = numpy.zeros(n_TOUR, dtype=numpy.int64)
    TOURpurpose = numpy.zeros(n_TOUR, dtype=numpy.int64)

    # Work tours, then other tours
    n2 = 0
    for n1 in range(n_PER):
        TOURper[n2:(n2 + PERntours[n1])] = PERid[n1]
        TOURperidx[n2:(n2 + PERntours[n1])] = PERidx[n1]
        TOURhh[n2:(n2 + PERntours[n1])] = PERhhid[n1]
        TOURhhidx[n2:(n2 + PERntours[n1])] = PERhhidx[n1]
        TOURpurpose[n2:(n2 + PERnworktours[n1])] = 1
        TOURpurpose[(n2 + PERnworktours[n1]):(n2 + PERntours[n1])] = 2
        n2 += PERntours[n1]

    #### Utility by mode to various destinations
    flog("Choice Probability")
    nameModes = ['DA', 'SR', 'Walk', 'Bike', 'Transit']
    mDA = 0
    mSR = 1
    mWA = 2
    mBI = 3
    mTR = 4
    nModes = len(nameModes)

    nModeNests = 3

    paramCOST = -0.312
    paramTIME = -0.123
    paramNMTIME = -0.246
    paramDIST = -0.00357
    paramLNDIST = -0.00642

    paramMUcar = 0.5
    paramMUnon = 0.75
    paramMUmot = 0.8
    paramMUtop = 1.0

    Util = numpy.zeros([n_TOUR, nZones, nModes])
    for n in range(n_TOUR):
        # Mode
        #		flog('N {}',n)
        #		flog('Util[n,:,:]  &&&')
        #		flog('{}',Util[n,:,:])
        otazi = HHhomezone[TOURhhidx[n]] - 1
        Util[n, :, mDA] += drivetime[otazi, :] * paramTIME + distance[
            otazi, :] * 0.20 * paramCOST
        if HHincome[TOURhhidx[n]] >= 75000:
            Util[n, :, mDA] += 1.0
            Util[n, :, mTR] -= 0.5
        Util[n, :, mSR] += drivetime[otazi, :] * paramTIME - 1.0 + distance[
            otazi, :] * 0.20 * 0.5 * paramCOST
        Util[n, :, mWA] += 1.0 + distance[otazi, :] / 2.5 * 60 * paramNMTIME
        Util[n, :, mBI] += -1.25 + distance[otazi, :] / 12 * 60 * paramNMTIME
        Util[n, :, mTR] += -1.5 + transittime[
            otazi, :] * paramTIME + transitfare[otazi, :] * paramCOST
        # Destination
        Util[n, :, :] += distance[otazi, :, None] * paramDIST + log1p(
            distance[otazi, :, None]) * paramLNDIST
        if HHincome[TOURhhidx[n]] <= 50000:
            Util[n, :, :] += 0.75 * log(zone_retail * 2.71828 +
                                        zone_nonretail)[:, None]
        else:
            Util[n, :, :] += 0.75 * log(zone_retail +
                                        zone_nonretail * 2.71828)[:, None]
#		flog('Util[n,:,:]  ...')
#		flog('{}',Util[n,:,:])
# Unavails
        if PERage[TOURperidx[n]] < 16:
            Util[n, :, mDA] = -numpy.inf
        Util[n, transitfare[otazi, :] <= 0, mTR] = -numpy.inf
        Util[n, distance[otazi, :] >= 3, mWA] = -numpy.inf
        Util[n, distance[otazi, :] >= 15, mBI] = -numpy.inf


#		flog('Util[n,:,:]  +++')
#		flog('{}',Util[n,:,:])

    CPr_car = numpy.zeros([n_TOUR, nZones, 2])  # [DA,SR]
    CPr_non = numpy.zeros([n_TOUR, nZones, 2])  # [WA,BI]
    CPr_mot = numpy.zeros([n_TOUR, nZones, 2])  # [TR,Car]
    CPr_top = numpy.zeros([n_TOUR, nZones, 2])  # [Non,Mot]

    NLS_car = numpy.zeros([
        n_TOUR,
        nZones,
    ])
    NLS_non = numpy.zeros([
        n_TOUR,
        nZones,
    ])
    NLS_mot = numpy.zeros([
        n_TOUR,
        nZones,
    ])
    MLS_top = numpy.zeros([
        n_TOUR,
        nZones,
    ])  # Mode choice logsum
    DLS_top = numpy.zeros([
        n_TOUR,
    ])  # Dest choice logsum

    Pr_modes = numpy.zeros([n_TOUR, nZones, nModes])
    Pr_dest = numpy.zeros([n_TOUR, nZones])

    with numpy.errstate(divide='ignore', invalid='ignore'):
        for n in range(n_TOUR):
            NLS_car[n, :] = paramMUcar * log(
                exp(Util[n, :, mDA] / paramMUcar) +
                exp(Util[n, :, mSR] / paramMUcar))
            NLS_non[n, :] = paramMUnon * log(
                exp(Util[n, :, mWA] / paramMUnon) +
                exp(Util[n, :, mBI] / paramMUnon))
            NLS_mot[n, :] = paramMUmot * log(
                exp(NLS_car[n, :] / paramMUmot) +
                exp(Util[n, :, mTR] / paramMUmot))
            MLS_top[n, :] = log(exp(NLS_non[n, :]) + exp(NLS_mot[n, :]))
            DLS_top[n] = log(numpy.sum(exp(MLS_top[n, :])))

            Pr_dest[n, :] = exp(MLS_top[n, :] - DLS_top[n])

            CPr_top[n, :, 0] = exp(
                (NLS_non[n, :] - MLS_top[n, :]) / paramMUtop)
            CPr_top[n, :, 1] = exp(
                (NLS_mot[n, :] - MLS_top[n, :]) / paramMUtop)
            CPr_mot[n, :, 0] = exp(
                (Util[n, :, mTR] - NLS_mot[n, :]) / paramMUmot)
            CPr_mot[n, :, 1] = exp(
                (NLS_car[n, :] - NLS_mot[n, :]) / paramMUmot)
            CPr_non[n, :, 0] = exp(
                (Util[n, :, mWA] - NLS_non[n, :]) / paramMUnon)
            CPr_non[n, :, 1] = exp(
                (Util[n, :, mBI] - NLS_non[n, :]) / paramMUnon)
            CPr_car[n, :, 0] = exp(
                (Util[n, :, mDA] - NLS_car[n, :]) / paramMUcar)
            CPr_car[n, :, 1] = exp(
                (Util[n, :, mSR] - NLS_car[n, :]) / paramMUcar)

            Pr_modes[n, :,
                     mTR] = CPr_mot[n, :, 0] * CPr_top[n, :, 1] * Pr_dest[n, :]
            Pr_modes[n, :,
                     mWA] = CPr_non[n, :, 0] * CPr_top[n, :, 0] * Pr_dest[n, :]
            Pr_modes[n, :,
                     mBI] = CPr_non[n, :, 1] * CPr_top[n, :, 0] * Pr_dest[n, :]
            Pr_modes[n, :, mDA] = CPr_car[n, :, 0] * CPr_mot[
                n, :, 1] * CPr_top[n, :, 1] * Pr_dest[n, :]
            Pr_modes[n, :, mSR] = CPr_car[n, :, 1] * CPr_mot[
                n, :, 1] * CPr_top[n, :, 1] * Pr_dest[n, :]

    Pr_modes[numpy.isnan(Pr_modes)] = 0

    ## Choices
    flog("Choices")
    for n in range(n_TOUR):
        try:
            ch = numpy.random.choice(nModes * nZones,
                                     replace=True,
                                     p=Pr_modes[n, :, :].ravel())
        except:
            flog("total prob = {}", Pr_modes[n, :, :].sum())
            raise
        dtazi = ch // nModes
        modei = ch - (dtazi * nModes)
        TOURdtaz[n] = dtazi + 1
        TOURmode[n] = modei + 1

    ### Write Out Data
    flog("Output")

    if not os.path.exists(directory):
        os.makedirs(directory)

    omx = larch.OMX(os.path.join(directory, 'exampville.omx'), mode='a')
    omx.shape = (nZones, nZones)
    omx.add_matrix('DIST', distance)
    omx.add_matrix('AUTO_TIME', drivetime)
    omx.add_matrix('RAIL_TIME', transittime)
    omx.add_matrix('RAIL_FARE', transitfare)
    omx.add_lookup('TAZID', numpy.arange(1, nZones + 1, dtype=numpy.int64))
    omx.add_lookup('EMPLOYMENT', zone_employment)
    omx.add_lookup('EMP_RETAIL', zone_retail)
    omx.add_lookup('EMP_NONRETAIL', zone_nonretail)
    omx.add_lookup('LAT', zone_lat)
    omx.add_lookup('LON', zone_lon)

    omx.flush()
    omx.close()
    omx = larch.OMX(os.path.join(directory, 'exampville.omx'), mode='r')

    f_hh = larch.DT(os.path.join(directory, 'exampville_hh.h5'), mode='a')
    f_hh.new_caseids(HHid)
    f_hh.new_idco_from_array('INCOME', HHincome)
    f_hh.new_idco_from_array('HHSIZE', HHsize)
    f_hh.new_idco_from_array('HOMETAZ', HHhomezone)
    f_hh.flush()

    f_pp = larch.DT(os.path.join(directory, 'exampville_person.h5'), mode='a')
    f_pp.new_caseids(PERid)
    f_pp.new_idco_from_array('HHID', PERhhid)
    f_pp.new_idco_from_array('AGE', PERage)
    f_pp.new_idco_from_array('WORKS',
                             PERworks,
                             dictionary={
                                 1: 'Yes',
                                 0: 'No'
                             },
                             title='Person has a regular job')
    f_pp.new_idco_from_array(
        'N_WORKTOURS',
        PERnworktours,
        title='Number of work tours reported by this person on the survey day')
    f_pp.new_idco_from_array(
        'N_OTHERTOURS',
        PERnothertours,
        title=
        'Number of non-work tours reported by this person on the survey day')
    f_pp.new_idco_from_array(
        'N_TOTALTOURS',
        PERntours,
        title=
        'Number of non-work tours reported by this person on the survey day')
    f_pp.flush()

    f_tour = larch.DT(os.path.join(directory, 'exampville_tours.h5'), mode='a')
    f_tour.new_caseids(TOURid)
    f_tour.new_idco_from_array('HHID', TOURhh)
    f_tour.new_idco_from_array('PERSONID', TOURper)
    f_tour.new_idco_from_array('DTAZ', TOURdtaz)
    f_tour.new_idco_from_array('TOURMODE',
                               TOURmode,
                               dictionary={
                                   1: 'DA',
                                   2: 'SR',
                                   3: 'Walk',
                                   4: 'Bike',
                                   5: 'Transit',
                               })
    f_tour.new_idco_from_array('TOURPURP',
                               TOURpurpose,
                               dictionary={
                                   1: 'Work Tour',
                                   2: 'Non-Work Tour',
                               })
    f_tour.flush()

    flog("EXAMPVILLE Completed Builder (Year 1)")
    flog("   SKIMS  : {}", omx.filename)
    flog("   HHs    : {}", f_hh.source_filename)
    flog("   Persons: {}", f_pp.source_filename)
    flog("   Tours  : {}", f_tour.source_filename)

    _cache_1 = (directory, omx, f_hh, f_pp, f_tour)

    return directory, omx, f_hh, f_pp, f_tour
Exemple #2
0
def test_ch_av_summary_output():

    skims = larch.OMX(larch.exampville.files.skims, mode='r')
    hh = pandas.read_csv(larch.exampville.files.hh)
    pp = pandas.read_csv(larch.exampville.files.person)
    tour = pandas.read_csv(larch.exampville.files.tour)

    pp_col = [
        'PERSONID', 'HHID', 'HHIDX', 'AGE', 'WORKS', 'N_WORK_TOURS',
        'N_OTHER_TOURS', 'N_TOURS', 'N_TRIPS', 'N_TRIPS_HBW', 'N_TRIPS_HBO',
        'N_TRIPS_NHB'
    ]

    raw = tour.merge(hh, on='HHID').merge(pp[pp_col], on=('HHID', 'PERSONID'))
    raw["HOMETAZi"] = raw["HOMETAZ"] - 1
    raw["DTAZi"] = raw["DTAZ"] - 1

    raw = raw[raw.TOURPURP == 1]

    f_tour = raw.join(skims.get_rc_dataframe(
        raw.HOMETAZi,
        raw.DTAZi,
    ))

    DA = 1
    SR = 2
    Walk = 3
    Bike = 4
    Transit = 5

    dfs = larch.DataFrames(
        co=f_tour,
        alt_codes=[DA, SR, Walk, Bike, Transit],
        alt_names=['DA', 'SR', 'Walk', 'Bike', 'Transit'],
    )

    m = larch.Model(dataservice=dfs)
    m.title = "Exampville Work Tour Mode Choice v1"

    m.utility_co[DA] = (
        +P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST  # dollars per mile
    )

    m.utility_co[SR] = (
        +P.ASC_SR + P.InVehTime * X.AUTO_TIME + P.Cost *
        (X.AUTO_COST * 0.5)  # dollars per mile, half share
        + P("HighInc:SR") * X("INCOME>75000"))

    m.utility_co[Walk] = (+P.ASC_Walk + P.NonMotorTime * X.WALK_TIME +
                          P("HighInc:Walk") * X("INCOME>75000"))

    m.utility_co[Bike] = (+P.ASC_Bike + P.NonMotorTime * X.BIKE_TIME +
                          P("HighInc:Bike") * X("INCOME>75000"))

    m.utility_co[Transit] = (+P.ASC_Transit + P.InVehTime * X.TRANSIT_IVTT +
                             P.OutVehTime * X.TRANSIT_OVTT +
                             P.Cost * X.TRANSIT_FARE +
                             P("HighInc:Transit") * X("INCOME>75000"))

    # No choice or avail data set
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(q.columns, ['name', 'chosen', 'available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q.values, [
        ['DA', None, None],
        ['SR', None, None],
        ['Walk', None, None],
        ['Bike', None, None],
        ['Transit', None, None],
        ['', 0, ''],
    ])

    # Reasonable choice and avail data set
    m.choice_co_code = 'TOURMODE'
    m.availability_co_vars = {
        DA: 'AGE >= 16',
        SR: '1',
        Walk: 'WALK_TIME < 60',
        Bike: 'BIKE_TIME < 60',
        Transit: 'TRANSIT_FARE>0',
    }
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(q.columns, ['name', 'chosen', 'available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q['name'].values,
                             ['DA', 'SR', 'Walk', 'Bike', 'Transit', ''])
    assert numpy.array_equal(q['chosen'].values,
                             [6052., 810., 196., 72., 434., 7564.])
    assert numpy.array_equal(
        q['available'].values,
        numpy.array([7564.0, 7564.0, 4179.0, 7564.0, 4199.0, ''],
                    dtype=object))

    # Unreasonable choice and avail data set
    m.choice_co_code = 'TOURMODE'
    m.availability_co_vars = {
        DA: 'AGE >= 26',
        SR: '1',
        Walk: 'WALK_TIME < 60',
        Bike: 'BIKE_TIME < 60',
        Transit: 'TRANSIT_FARE>0',
    }
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(
        q.columns, ['name', 'chosen', 'available', 'chosen but not available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q['name'].values,
                             ['DA', 'SR', 'Walk', 'Bike', 'Transit', ''])
    assert numpy.array_equal(q['chosen'].values,
                             [6052., 810., 196., 72., 434., 7564.])
    assert numpy.array_equal(
        q['available'].values,
        numpy.array([6376.0, 7564.0, 4179.0, 7564.0, 4199.0, ''],
                    dtype=object))
    assert numpy.array_equal(q['chosen but not available'].values,
                             [942.0, 0.0, 0.0, 0.0, 0.0, 942.0])
Exemple #3
0
import larch, numpy, pandas, os
import larch.exampville
from matplotlib import pyplot as plt

# 导入数据
skims = larch.OMX(larch.exampville.files.skims, mode='r')
hh = pandas.read_csv(larch.exampville.files.hh)
pp = pandas.read_csv(larch.exampville.files.person)
tour = pandas.read_csv(larch.exampville.files.tour)

tour.TOURPURP.statistics()
df = tour[tour.TOURPURP == 1]    # 筛选出步行的数据
# print(df.info()) # df的数据信息
# print(df['TOURPURP'])   # 读取df的'TOURPURP'的列

df = df.merge(hh, on='HHID').merge(pp, on=('HHID', 'PERSONID'))
df["HOMETAZi"] = df["HOMETAZ"] - 1
df["DTAZi"] = df["DTAZ"] - 1


df = df.join(
    skims.get_rc_dataframe(
        df["HOMETAZi"], df["DTAZi"],
    )
)


# For clarity, we can define numbers as names for modes
DA = 1
SR = 2
Walk = 3