def getorigin(ilat, ilong, popemp, mbta, zipscale, subwayscale): """ Compute features for where people live, work, and ride the subway. """ # locations of where people live latbyzip = popemp['latitude'].values longbyzip = popemp['longitude'].values # locations of where people work popbyzip = popemp['HD01'].values workbyzip = popemp['EMP'].values # replace nans with average value finite = workbyzip * 0 == 0 mwork = workbyzip[finite].mean() nans = workbyzip * 0 != 0 workbyzip[nans] = mwork # locations of where people ride the subway latbysubway = mbta['latitude'] longbysubway = mbta['longitude'] subwayrides = mbta['ridesperday'].values # get the distance from given location to all zip codes distancevec = densitymetric.distvec(latbyzip, longbyzip, ilat, ilong) # compute the coupling factor to all zip codes couplingzip = densitymetric.zipcouple(distancevec, zipscale) # use the weighted sum as the origin score # normalize by number of zip codes nzip = len(workbyzip) originpop = np.sum(popbyzip * couplingzip) / nzip originwork = np.sum(workbyzip * couplingzip) / nzip # get the distance from given location to all subway stops distancevecsubway = densitymetric.distvec(latbysubway, longbysubway, ilat, ilong) # coupling efficiency between this station and all subway stops couplingsubway = densitymetric.zipcouple(distancevecsubway, subwayscale) # use the weighted sum as the origin score # normalize by number of subway stops nsubway = len(subwayrides) originsubway = np.sum(subwayrides * couplingsubway) / nsubway return originpop, originwork, originsubway
def getdestination(ilat, ilong, station, stationscale, zipscale, stationfeatures, dataloc): """ Compute features associated with all possible destinations. """ # origin features for where people live, work and ride the subway originpop = stationfeatures['originpop'].values originwork = stationfeatures['originwork'].values originsubway = stationfeatures['originsubway'].values # location of existing stations stationlat = station['lat'].values stationlong = station['lng'].values # compute the distance in miles from input station to all existing stations distancevec = densitymetric.distvec(stationlat, stationlong, ilat, ilong) # compute the station to station coupling factor stationcoupling = densitymetric.stationcouple(distancevec, dataloc) # determine coupling factor of closest station; this is used subsequently # to compute the cannibalism factor: nrides -= nrides * maxcouple zipcoupling = densitymetric.zipcouple(distancevec, zipscale) maxcouple = zipcoupling.max() # use the weighted sum as the destination score # normalize by number of stations norigin = len(originpop) destpop = np.sum(originpop * stationcoupling) / norigin destwork = np.sum(originwork * stationcoupling) / norigin destsubway = np.sum(originsubway * stationcoupling) / norigin return destpop, destwork, destsubway, maxcouple
def getfeature(ilat, ilong, popemp, mbta, station, zipscale, stationscale, subwayscale, stationpop, stationwork, stationsubway, latvec, longvec): """ Get the feature vector associated with the input latitude and longitude. """ stationlat = station['lat'].values stationlong = station['lng'].values latbyzip = popemp['latitude'].values longbyzip = popemp['longitude'].values popbyzip = popemp['HD01'].values workbyzip = popemp['EMP'].values # fix nans finite = workbyzip * 0 == 0 mwork = workbyzip[finite].mean() nans = workbyzip * 0 != 0 workbyzip[nans] = mwork latbysubway = mbta['latitude'] longbysubway = mbta['longitude'] subwayrides = mbta['ridesperday'].values distancevec = densitymetric.distvec(latbyzip, longbyzip, ilat, ilong) couplingzip = densitymetric.coupling(distancevec, zipscale) originpop = np.sum(popbyzip * couplingzip) originwork = np.sum(workbyzip * couplingzip) distancevecsubway = densitymetric.distvec(latbysubway, longbysubway, ilat, ilong) # coupling efficiency between this station and all subway stops couplingsubway = densitymetric.coupling(distancevecsubway, subwayscale) # weighted sum of subway rides originsubway = np.sum(subwayrides * couplingsubway) #fmt = '{0:3} {1:.5f} {2:.5f} {3:.2f} {4:.2f} {5:.5f} {6:.3f} {7:.3f}' #print(fmt.format(ilat, ilong, originpop, originwork, # originsubway, distancevec.min(), # distancevec.max(), distancevec.mean())) # test: Is there a station in stationcoupling that is ~1? Are stations # that are known to be far from each other correctly assigned a low # coupling efficiency? Tests indicate yes. # compute destination scores for population, employee, and subway destpop = [] destwork = [] destsubway = [] distancevec = densitymetric.distvec(stationlat, stationlong, ilat, ilong) # station to station coupling stationcoupling = densitymetric.coupling(distancevec, stationscale) destpop = np.sum(stationpop * stationcoupling) destwork = np.sum(stationwork * stationcoupling) destsubway = np.sum(stationsubway * stationcoupling) features = [originpop, originwork, destpop, destwork, originsubway, destsubway] maxcouple = stationcoupling.max() return features, maxcouple
def getfeature(ilat, ilong, popemp, mbta, station, zipscale, stationscale, subwayscale, stationpop, stationwork, stationsubway, latvec, longvec): """ Get the feature vector associated with the input latitude and longitude. """ stationlat = station['lat'].values stationlong = station['lng'].values latbyzip = popemp['latitude'].values longbyzip = popemp['longitude'].values popbyzip = popemp['HD01'].values workbyzip = popemp['EMP'].values # fix nans finite = workbyzip * 0 == 0 mwork = workbyzip[finite].mean() nans = workbyzip * 0 != 0 workbyzip[nans] = mwork latbysubway = mbta['latitude'] longbysubway = mbta['longitude'] subwayrides = mbta['ridesperday'].values distancevec = densitymetric.distvec(latbyzip, longbyzip, ilat, ilong) couplingzip = densitymetric.coupling(distancevec, zipscale) originpop = np.sum(popbyzip * couplingzip) originwork = np.sum(workbyzip * couplingzip) distancevecsubway = densitymetric.distvec(latbysubway, longbysubway, ilat, ilong) # coupling efficiency between this station and all subway stops couplingsubway = densitymetric.coupling(distancevecsubway, subwayscale) # weighted sum of subway rides originsubway = np.sum(subwayrides * couplingsubway) #fmt = '{0:3} {1:.5f} {2:.5f} {3:.2f} {4:.2f} {5:.5f} {6:.3f} {7:.3f}' #print(fmt.format(ilat, ilong, originpop, originwork, # originsubway, distancevec.min(), # distancevec.max(), distancevec.mean())) # test: Is there a station in stationcoupling that is ~1? Are stations # that are known to be far from each other correctly assigned a low # coupling efficiency? Tests indicate yes. # compute destination scores for population, employee, and subway destpop = [] destwork = [] destsubway = [] distancevec = densitymetric.distvec(stationlat, stationlong, ilat, ilong) # station to station coupling stationcoupling = densitymetric.coupling(distancevec, stationscale) destpop = np.sum(stationpop * stationcoupling) destwork = np.sum(stationwork * stationcoupling) destsubway = np.sum(stationsubway * stationcoupling) features = [ originpop, originwork, destpop, destwork, originsubway, destsubway ] maxcouple = stationcoupling.max() return features, maxcouple