def getorigin(ilat, ilong, popemp, mbta, zipscale, subwayscale):

    """

    Compute features for where people live, work, and ride the subway.

    """

    # locations of where people live
    latbyzip = popemp['latitude'].values
    longbyzip = popemp['longitude'].values

    # locations of where people work
    popbyzip = popemp['HD01'].values
    workbyzip = popemp['EMP'].values

    # replace nans with average value
    finite = workbyzip * 0 == 0
    mwork = workbyzip[finite].mean()
    nans = workbyzip * 0 != 0
    workbyzip[nans] = mwork

    # locations of where people ride the subway
    latbysubway = mbta['latitude']
    longbysubway = mbta['longitude']
    subwayrides = mbta['ridesperday'].values    

    # get the distance from given location to all zip codes
    distancevec = densitymetric.distvec(latbyzip, longbyzip, ilat, ilong)

    # compute the coupling factor to all zip codes
    couplingzip = densitymetric.zipcouple(distancevec, zipscale)

    # use the weighted sum as the origin score
    # normalize by number of zip codes
    nzip = len(workbyzip)
    originpop = np.sum(popbyzip * couplingzip) / nzip
    originwork = np.sum(workbyzip * couplingzip) / nzip

    # get the distance from given location to all subway stops
    distancevecsubway = densitymetric.distvec(latbysubway, longbysubway, 
            ilat, ilong)

    # coupling efficiency between this station and all subway stops
    couplingsubway = densitymetric.zipcouple(distancevecsubway, subwayscale)

    # use the weighted sum as the origin score
    # normalize by number of subway stops
    nsubway = len(subwayrides)
    originsubway = np.sum(subwayrides * couplingsubway) / nsubway

    return originpop, originwork, originsubway
Exemple #2
0
def getorigin(ilat, ilong, popemp, mbta, zipscale, subwayscale):
    """

    Compute features for where people live, work, and ride the subway.

    """

    # locations of where people live
    latbyzip = popemp['latitude'].values
    longbyzip = popemp['longitude'].values

    # locations of where people work
    popbyzip = popemp['HD01'].values
    workbyzip = popemp['EMP'].values

    # replace nans with average value
    finite = workbyzip * 0 == 0
    mwork = workbyzip[finite].mean()
    nans = workbyzip * 0 != 0
    workbyzip[nans] = mwork

    # locations of where people ride the subway
    latbysubway = mbta['latitude']
    longbysubway = mbta['longitude']
    subwayrides = mbta['ridesperday'].values

    # get the distance from given location to all zip codes
    distancevec = densitymetric.distvec(latbyzip, longbyzip, ilat, ilong)

    # compute the coupling factor to all zip codes
    couplingzip = densitymetric.zipcouple(distancevec, zipscale)

    # use the weighted sum as the origin score
    # normalize by number of zip codes
    nzip = len(workbyzip)
    originpop = np.sum(popbyzip * couplingzip) / nzip
    originwork = np.sum(workbyzip * couplingzip) / nzip

    # get the distance from given location to all subway stops
    distancevecsubway = densitymetric.distvec(latbysubway, longbysubway, ilat,
                                              ilong)

    # coupling efficiency between this station and all subway stops
    couplingsubway = densitymetric.zipcouple(distancevecsubway, subwayscale)

    # use the weighted sum as the origin score
    # normalize by number of subway stops
    nsubway = len(subwayrides)
    originsubway = np.sum(subwayrides * couplingsubway) / nsubway

    return originpop, originwork, originsubway
def getdestination(ilat, ilong, station, stationscale, zipscale, 
        stationfeatures, dataloc):

    """

    Compute features associated with all possible destinations.


    """

    # origin features for where people live, work and ride the subway
    originpop = stationfeatures['originpop'].values
    originwork = stationfeatures['originwork'].values
    originsubway = stationfeatures['originsubway'].values

    # location of existing stations
    stationlat = station['lat'].values
    stationlong = station['lng'].values

    # compute the distance in miles from input station to all existing stations
    distancevec = densitymetric.distvec(stationlat, stationlong, ilat, ilong)

    # compute the station to station coupling factor
    stationcoupling = densitymetric.stationcouple(distancevec, dataloc)

    # determine coupling factor of closest station; this is used subsequently
    # to compute the cannibalism factor: nrides -= nrides * maxcouple
    zipcoupling = densitymetric.zipcouple(distancevec, zipscale)
    maxcouple = zipcoupling.max()

    # use the weighted sum as the destination score
    # normalize by number of stations
    norigin = len(originpop)
    destpop = np.sum(originpop * stationcoupling) / norigin
    destwork = np.sum(originwork * stationcoupling) / norigin
    destsubway = np.sum(originsubway * stationcoupling) / norigin

    return destpop, destwork, destsubway, maxcouple
Exemple #4
0
def getdestination(ilat, ilong, station, stationscale, zipscale,
                   stationfeatures, dataloc):
    """

    Compute features associated with all possible destinations.


    """

    # origin features for where people live, work and ride the subway
    originpop = stationfeatures['originpop'].values
    originwork = stationfeatures['originwork'].values
    originsubway = stationfeatures['originsubway'].values

    # location of existing stations
    stationlat = station['lat'].values
    stationlong = station['lng'].values

    # compute the distance in miles from input station to all existing stations
    distancevec = densitymetric.distvec(stationlat, stationlong, ilat, ilong)

    # compute the station to station coupling factor
    stationcoupling = densitymetric.stationcouple(distancevec, dataloc)

    # determine coupling factor of closest station; this is used subsequently
    # to compute the cannibalism factor: nrides -= nrides * maxcouple
    zipcoupling = densitymetric.zipcouple(distancevec, zipscale)
    maxcouple = zipcoupling.max()

    # use the weighted sum as the destination score
    # normalize by number of stations
    norigin = len(originpop)
    destpop = np.sum(originpop * stationcoupling) / norigin
    destwork = np.sum(originwork * stationcoupling) / norigin
    destsubway = np.sum(originsubway * stationcoupling) / norigin

    return destpop, destwork, destsubway, maxcouple
def getfeature(ilat, ilong, popemp, mbta, station, zipscale, stationscale,
        subwayscale, stationpop, stationwork, stationsubway, latvec, longvec):

    """

    Get the feature vector associated with the input latitude and longitude.

    """

    stationlat = station['lat'].values
    stationlong = station['lng'].values
    latbyzip = popemp['latitude'].values
    longbyzip = popemp['longitude'].values
    popbyzip = popemp['HD01'].values
    workbyzip = popemp['EMP'].values
    # fix nans
    finite = workbyzip * 0 == 0
    mwork = workbyzip[finite].mean()
    nans = workbyzip * 0 != 0
    workbyzip[nans] = mwork
    latbysubway = mbta['latitude']
    longbysubway = mbta['longitude']
    subwayrides = mbta['ridesperday'].values    
    distancevec = densitymetric.distvec(latbyzip, longbyzip, ilat, ilong)
    couplingzip = densitymetric.coupling(distancevec, zipscale)
    originpop = np.sum(popbyzip * couplingzip)
    originwork = np.sum(workbyzip * couplingzip)

    distancevecsubway = densitymetric.distvec(latbysubway, longbysubway, 
            ilat, ilong)

    # coupling efficiency between this station and all subway stops
    couplingsubway = densitymetric.coupling(distancevecsubway, subwayscale)

    # weighted sum of subway rides
    originsubway = np.sum(subwayrides * couplingsubway)

    #fmt = '{0:3} {1:.5f} {2:.5f} {3:.2f} {4:.2f} {5:.5f} {6:.3f} {7:.3f}'
    #print(fmt.format(ilat, ilong, originpop, originwork, 
    #    originsubway, distancevec.min(), 
    #    distancevec.max(), distancevec.mean()))

    # test: Is there a station in stationcoupling that is ~1?  Are stations
    # that are known to be far from each other correctly assigned a low
    # coupling efficiency?  Tests indicate yes.

    # compute destination scores for population, employee, and subway
    destpop = []
    destwork = []
    destsubway = []

    distancevec = densitymetric.distvec(stationlat, stationlong, ilat, ilong)

    # station to station coupling
    stationcoupling = densitymetric.coupling(distancevec, stationscale)

    destpop = np.sum(stationpop * stationcoupling)
    destwork = np.sum(stationwork * stationcoupling)
    destsubway = np.sum(stationsubway * stationcoupling)

    features = [originpop, originwork, destpop, destwork,
            originsubway, destsubway]

    maxcouple = stationcoupling.max()

    return features, maxcouple
Exemple #6
0
def getfeature(ilat, ilong, popemp, mbta, station, zipscale, stationscale,
               subwayscale, stationpop, stationwork, stationsubway, latvec,
               longvec):
    """

    Get the feature vector associated with the input latitude and longitude.

    """

    stationlat = station['lat'].values
    stationlong = station['lng'].values
    latbyzip = popemp['latitude'].values
    longbyzip = popemp['longitude'].values
    popbyzip = popemp['HD01'].values
    workbyzip = popemp['EMP'].values
    # fix nans
    finite = workbyzip * 0 == 0
    mwork = workbyzip[finite].mean()
    nans = workbyzip * 0 != 0
    workbyzip[nans] = mwork
    latbysubway = mbta['latitude']
    longbysubway = mbta['longitude']
    subwayrides = mbta['ridesperday'].values
    distancevec = densitymetric.distvec(latbyzip, longbyzip, ilat, ilong)
    couplingzip = densitymetric.coupling(distancevec, zipscale)
    originpop = np.sum(popbyzip * couplingzip)
    originwork = np.sum(workbyzip * couplingzip)

    distancevecsubway = densitymetric.distvec(latbysubway, longbysubway, ilat,
                                              ilong)

    # coupling efficiency between this station and all subway stops
    couplingsubway = densitymetric.coupling(distancevecsubway, subwayscale)

    # weighted sum of subway rides
    originsubway = np.sum(subwayrides * couplingsubway)

    #fmt = '{0:3} {1:.5f} {2:.5f} {3:.2f} {4:.2f} {5:.5f} {6:.3f} {7:.3f}'
    #print(fmt.format(ilat, ilong, originpop, originwork,
    #    originsubway, distancevec.min(),
    #    distancevec.max(), distancevec.mean()))

    # test: Is there a station in stationcoupling that is ~1?  Are stations
    # that are known to be far from each other correctly assigned a low
    # coupling efficiency?  Tests indicate yes.

    # compute destination scores for population, employee, and subway
    destpop = []
    destwork = []
    destsubway = []

    distancevec = densitymetric.distvec(stationlat, stationlong, ilat, ilong)

    # station to station coupling
    stationcoupling = densitymetric.coupling(distancevec, stationscale)

    destpop = np.sum(stationpop * stationcoupling)
    destwork = np.sum(stationwork * stationcoupling)
    destsubway = np.sum(stationsubway * stationcoupling)

    features = [
        originpop, originwork, destpop, destwork, originsubway, destsubway
    ]

    maxcouple = stationcoupling.max()

    return features, maxcouple