def pathMatch(dbServer, networkName, userName, password, filename, limitMap = None): # Default parameters, with explanations and cross-references to Perrine et al., 2015: pointSearchRadius = 1000 # "k": Radius (ft) to search from GTFS point to perpendicular VISTA links pointSearchPrimary = 350 # "k_p": Radius (ft) to search from GTFS point to new VISTA links pointSearchSecondary = 200 # "k_s": Radius (ft) to search from VISTA perpendicular point to previous point limitLinearDist = 3800 # Path distance (ft) to allow new proposed paths from one point to another limitDirectDist = 3500 # Radius (ft) to allow new proposed paths from one point to another limitDirectDistRev = 500 # Radius (ft) to allow backtracking on an existing link (e.g. parking lot) distanceFactor = 1.0 # "f_d": Cost multiplier for Linear path distance driftFactor = 1.5 # "f_r": Cost multiplier for distance from GTFS point to its VISTA link nonPerpPenalty = 1.5 # "f_p": Penalty multiplier for GTFS points that aren't perpendicular to VISTA links limitClosestPoints = 8 # "q_p": Number of close-proximity points that are considered for each GTFS point limitSimultaneousPaths = 6 # "q_e": Number of proposed paths to maintain during pathfinding stage maxHops = 12 # Maximum number of VISTA links to pursue in a path-finding operation # Get the database connected: print("INFO: Connect to database...", file = sys.stderr) database = vista_network.connect(dbServer, userName, password, networkName) # Read in the topology from the VISTA database: print("INFO: Read topology from database...", file = sys.stderr) vistaGraph = vista_network.fillGraph(database) # Read in the GPS track information: print("INFO: Read GDB GPS track...", file = sys.stderr) gpsTracks = fillFromFile(filename, vistaGraph.gps) # Initialize the path-finder: pathFinder = path_engine.PathEngine(pointSearchRadius, pointSearchPrimary, pointSearchSecondary, limitLinearDist, limitDirectDist, limitDirectDistRev, distanceFactor, driftFactor, nonPerpPenalty, limitClosestPoints, limitSimultaneousPaths) pathFinder.maxHops = maxHops # Begin iteration through each shape: datafileIDs = compat.listkeys(gpsTracks) "@type datafileIDs: list<str>" datafileIDs.sort() nodesResults = {} "@type nodesResults: dict<str, list<path_engine.PathEnd>>" if limitMap is not None: for datafileID in limitMap: if datafileID not in datafileIDs: print("WARNING: Limit datafile ID %d is not found in the shape file." % datafileID, file = sys.stderr) for datafileID in datafileIDs: "@type datafileID: int" if limitMap is not None and datafileID not in limitMap: continue print("INFO: -- Datafile %s --" % datafileID, file = sys.stderr) # Find the path for the given shape: gtfsNodes = pathFinder.constructPath(gpsTracks[datafileID], vistaGraph) # File this away as a result for later output: nodesResults[datafileID] = gtfsNodes return nodesResults
def pathsRefine(gtfsNodes, hintEntries, vistaGraph): # Default parameters, with explanations and cross-references to Perrine et al., 2015: hintRefactorRadius = 1000 # Radius (ft) to invalidate surrounding found points. termRefactorRadius = 3000 # Radius (ft) to invalidate found points at either end of a restart. pointSearchRadius = 1600 # "k": Radius (ft) to search from GTFS point to perpendicular VISTA links pointSearchPrimary = 1600 # "k_p": Radius (ft) to search from GTFS point to new VISTA links pointSearchSecondary = 200 # "k_s": Radius (ft) to search from VISTA perpendicular point to previous point limitLinearDist = 6200 # Path distance (ft) to allow new proposed paths from one point to another limitDirectDist = 6200 # Radius (ft) to allow new proposed paths from one point to another limitDirectDistRev = 500 # Radius (ft) to allow backtracking on an existing link (e.g. parking lot) distanceFactor = 1.0 # "f_d": Cost multiplier for Linear path distance driftFactor = 1.5 # "f_r": Cost multiplier for distance from GTFS point to its VISTA link nonPerpPenalty = 1.5 # "f_p": Penalty multiplier for GTFS points that aren't perpendicular to VISTA links limitClosestPoints = 25 # "q_p": Number of close-proximity points that are considered for each GTFS point limitSimultaneousPaths = 25 # "q_e": Number of proposed paths to maintain during pathfinding stage maxHops = 8 # Maximum number of VISTA links to pursue in a path-finding operation limitHintClosest = 4 # Number of hint closest points and closest previous track points # Initialize the path-finder: pathFinder = path_engine.PathEngine(pointSearchRadius, pointSearchPrimary, pointSearchSecondary, limitLinearDist, limitDirectDist, limitDirectDistRev, distanceFactor, driftFactor, nonPerpPenalty, limitClosestPoints, limitSimultaneousPaths) pathFinder.setRefineParams(hintRefactorRadius, termRefactorRadius) pathFinder.maxHops = maxHops pathFinder.limitHintClosest = limitHintClosest # Begin iteration through each shape: shapeIDs = compat.listkeys(gtfsNodes) "@type shapeIDs: list<int>" shapeIDs.sort() gtfsNodesResults = {} "@type gtfsNodesResults: dict<int, list<path_engine.PathEnd>>" for shapeID in shapeIDs: "@type shapeID: int" print("INFO: -- Shape ID %s --" % str(shapeID), file=sys.stderr) # Find the path for the given shape: gtfsNodesRevised = pathFinder.refinePath( gtfsNodes[shapeID], vistaGraph, hintEntries[shapeID] if shapeID in hintEntries else list()) # File this away as a result for later output: gtfsNodesResults[shapeID] = gtfsNodesRevised return gtfsNodesResults
def dumpBusRouteLinks(gtfsTrips, gtfsStopTimes, gtfsNodes, vistaNetwork, stopSearchRadius, excludeUpstream, userName, networkName, startTime, endTime, widenBegin, widenEnd, excludeBegin, excludeEnd, outFile=sys.stdout): """ dumpBusRouteLinks dumps out a public.bus_route_link.csv file contents. This also will remove all stop times and trips that fall outside of the valid evaluation interval as dictated by the exclusion parameters. @type gtfsTrips: dict<int, gtfs.TripsEntry> @type gtfsStopTimes: dict<TripsEntry, list<StopTimesEntry>> @type gtfsNodes: dict<int, list<path_engine.PathEnd>> @type vistaNetwork: graph.GraphLib @type stopSearchRadius: float @type excludeUpstream: boolean @type userName: str @type networkName: str @type startTime: datetime @type endTime: datetime @type widenBegin: bool @type widenEnd: bool @type excludeBegin: bool @type excludeEnd: bool @type outFile: file @return A mapping of stopID to points-on-links plus the start and end times adjusted for warm-up and cool-down (if widenBegin or widenEnd is True) @rtype (dict<int, graph.PointOnLink>, datetime, datetime) """ _outHeader("public.bus_route_link", userName, networkName, outFile) print('"route","sequence","link","stop","dwelltime",', file=outFile) # Set up the output: ret = {} "@type ret: dict<int, graph.PointOnLink>" warmupStartTime = startTime cooldownEndTime = endTime # Initialize the path engine for use later: pathEngine = path_engine.PathEngine(stopSearchRadius, stopSearchRadius, stopSearchRadius, sys.float_info.max, sys.float_info.max, stopSearchRadius, 1, 1, 1, sys.maxsize, sys.maxsize) pathEngine.limitClosestPoints = 8 pathEngine.limitSimultaneousPaths = 6 pathEngine.maxHops = 12 pathEngine.logFile = None # Suppress the log outputs for the path engine; enough stuff will come from other sources. problemReportNodes = {} "@type problemReportNodes: dict<?, path_engine.PathEnd>" tripIDs = compat.listkeys(gtfsTrips) tripIDs.sort() for tripID in tripIDs: if gtfsTrips[tripID].shapeEntries[0].shapeID not in gtfsNodes: # This happens if the incoming files contain a subset of all available topology. print( "WARNING: Skipping route for trip %d because no points are available." % tripID, file=sys.stderr) continue treeNodes = gtfsNodes[gtfsTrips[tripID].shapeEntries[0].shapeID] "@type treeNodes: list<path_engine.PathEnd>" # Step 1: Find the longest distance of contiguous valid links within the shape for each trip: startIndex = -1 curIndex = 0 linkCount = 0 totalLinks = 0 longestStart = -1 longestEnd = len(treeNodes) longestDist = sys.float_info.min longestLinkCount = 0 while curIndex <= len(treeNodes): if (curIndex == len(treeNodes)) or ( curIndex == 0) or treeNodes[curIndex].restart: totalLinks += 1 linkCount += 1 if (curIndex > startIndex) and (startIndex >= 0): # We have a contiguous interval. See if it wins: if treeNodes[curIndex - 1].totalDist - treeNodes[ startIndex].totalDist > longestDist: longestStart = startIndex longestEnd = curIndex longestDist = treeNodes[ curIndex - 1].totalDist - treeNodes[startIndex].totalDist longestLinkCount = linkCount linkCount = 0 # This happens if it is time to start a new interval: startIndex = curIndex else: totalLinks += len(treeNodes[curIndex].routeInfo) linkCount += len(treeNodes[curIndex].routeInfo) curIndex += 1 if longestStart >= 0: # We have a valid path. See if it had been trimmed down and report it. if (longestStart > 0) or (longestEnd < len(treeNodes)): print("WARNING: For shape ID %s from seq. %d through %d, %.2g%% of %d links will be used." \ % (str(treeNodes[longestStart].shapeEntry.shapeID), treeNodes[longestStart].shapeEntry.shapeSeq, treeNodes[longestEnd - 1].shapeEntry.shapeSeq, 100 * float(longestLinkCount) / float(totalLinks), totalLinks), file = sys.stderr) # Step 2: Ignore routes that are entirely outside our valid time interval. flag = False if len(gtfsStopTimes[gtfsTrips[tripID]]) == 0: # This will happen if we don't have stops defined. In this case, we want to go ahead and process the bus_route_link # outputs because we don't know if the trip falls in or out of the valid time range. flag = True else: for stopEntry in gtfsStopTimes[gtfsTrips[tripID]]: if stopEntry.arrivalTime >= startTime and stopEntry.arrivalTime <= endTime: flag = True break if not flag: # This will be done silently because (depending upon the valid interval) there could be # hundreds of these in a GTFS set. continue # Step 3: Match up stops to that contiguous list: # At this point, we're doing something with this. print("INFO: -- Matching stops for trip %d --" % tripID, file=sys.stderr) stopTimes = gtfsStopTimes[gtfsTrips[tripID]] "@type stopTimes: list<gtfs.StopTimesEntry>" # Isolate the relevant VISTA tree nodes: (Assume from above that this is a non-zero length array) ourGTFSNodes = treeNodes[longestStart:longestEnd] # We are going to recreate a small VISTA network from ourGTFSNodes and then match up the stops to that. # First, prepare the small VISTA network: vistaSubset = graph.GraphLib(vistaNetwork.gps.latCtr, vistaNetwork.gps.lngCtr) vistaNodePrior = None "@type vistaNodePrior: graph.GraphNode" # Build a list of links: outLinkIDList = [] "@type outLinkList: list<int>" # Plop in the start node: vistaNodePrior = graph.GraphNode( ourGTFSNodes[0].pointOnLink.link.origNode.id, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLat, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLng) vistaSubset.addNode(vistaNodePrior) outLinkIDList.append(ourGTFSNodes[0].pointOnLink.link.id) # Link together nodes as we traverse through them: for ourGTFSNode in ourGTFSNodes: "@type ourGTFSNode: path_engine.PathEnd" # There should only be one destination link per VISTA node because this comes form our tree. # If there is no link or we're repeating the first one, then there were no new links assigned. if (len(ourGTFSNode.routeInfo) < 1) or ((len(outLinkIDList) == 1) \ and (ourGTFSNode.routeInfo[0].id == ourGTFSNodes[0].pointOnLink.link.id)): continue for link in ourGTFSNode.routeInfo: "@type link: graph.GraphLink" if link.id not in vistaNetwork.linkMap: print( "WARNING: In finding bus route links, link ID %d is not found in the VISTA network." % link.id, file=sys.stderr) continue origVistaLink = vistaNetwork.linkMap[link.id] "@type origVistaLink: graph.GraphLink" if origVistaLink.origNode.id not in vistaSubset.nodeMap: # Create a new node: vistaNode = graph.GraphNode( origVistaLink.origNode.id, origVistaLink.origNode.gpsLat, origVistaLink.origNode.gpsLng) vistaSubset.addNode(vistaNode) else: # The path evidently crosses over itself. Reuse an existing node. vistaNode = vistaSubset.nodeMap[ origVistaLink.origNode.id] # We shall label our links as indices into the stage we're at in ourGTFSNodes links. This will allow for access later. if outLinkIDList[-1] not in vistaSubset.linkMap: vistaSubset.addLink( graph.GraphLink(outLinkIDList[-1], vistaNodePrior, vistaNode)) vistaNodePrior = vistaNode outLinkIDList.append(link.id) # And then finish off the graph with the last link: if ourGTFSNode.pointOnLink.link.destNode.id not in vistaSubset.nodeMap: vistaNode = graph.GraphNode( ourGTFSNode.pointOnLink.link.destNode.id, ourGTFSNode.pointOnLink.link.destNode.gpsLat, ourGTFSNode.pointOnLink.link.destNode.gpsLng) vistaSubset.addNode(vistaNode) if outLinkIDList[-1] not in vistaSubset.linkMap: vistaSubset.addLink( graph.GraphLink(outLinkIDList[-1], vistaNodePrior, vistaNode)) # Then, prepare the stops as GTFS shapes entries: print("INFO: Mapping stops to VISTA network...", file=sys.stderr) gtfsShapes = [] gtfsStopsLookup = {} "@type gtfsStopsLookup: dict<int, gtfs.StopTimesEntry>" # Append an initial dummy shape to force routing through the path start: gtfsShapes.append( gtfs.ShapesEntry( -1, -1, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLat, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLng)) # Append all of the stops: for gtfsStopTime in stopTimes: "@type gtfsStopTime: gtfs.StopTimesEntry" gtfsShapes.append( gtfs.ShapesEntry(-1, gtfsStopTime.stopSeq, gtfsStopTime.stop.gpsLat, gtfsStopTime.stop.gpsLng)) gtfsStopsLookup[gtfsStopTime.stopSeq] = gtfsStopTime # Append a trailing dummy shape to force routing through the path end: gtfsShapes.append( gtfs.ShapesEntry( -1, -1, ourGTFSNodes[-1].pointOnLink.link.destNode.gpsLat, ourGTFSNodes[-1].pointOnLink.link.destNode.gpsLng)) # Find a path through our prepared node map subset: resultTree = pathEngine.constructPath(gtfsShapes, vistaSubset) "@type resultTree: list<path_engine.PathEnd>" # Strip off the dummy ends: del resultTree[-1] del resultTree[0] if len(resultTree) > 0: resultTree[0].prevTreeNode = None # So now we should have one tree entry per matched stop. # Deal with Problem Report: # TODO: The Problem Report will include all nodes on each path regardless of valid time interval; # However; we will not have gotten here if the trip was entirely outside of it. if problemReport: revisedNodeList = {} prevNode = None "@type revisedNodeList = list<path_engine.PathEnd>" for stopNode in resultTree: # Reconstruct a tree node in terms of the original network. newShape = gtfs.ShapesEntry( gtfsTrips[tripID].shapeEntries[0].shapeID, stopNode.shapeEntry.shapeSeq, stopNode.shapeEntry.lat, stopNode.shapeEntry.lng, False) origLink = vistaNetwork.linkMap[ stopNode.pointOnLink.link.id] newPointOnLink = graph.PointOnLink( origLink, stopNode.pointOnLink.dist, stopNode.pointOnLink.nonPerpPenalty, stopNode.pointOnLink.refDist) newNode = path_engine.PathEnd(newShape, newPointOnLink) newNode.restart = False newNode.totalCost = stopNode.totalCost newNode.totalDist = stopNode.totalDist newNode.routeInfo = [] for link in stopNode.routeInfo: newNode.routeInfo.append(vistaNetwork.linkMap[link.id]) newNode.prevTreeNode = prevNode prevNode = newNode revisedNodeList[stopNode.shapeEntry.shapeSeq] = newNode problemReportNodes[gtfsTrips[tripID].shapeEntries[0]. shapeID] = revisedNodeList # Walk through our output link list and see where the resultTree entries occur: resultIndex = 0 stopMatches = [] "@type stopMatches: list<StopMatch>" rejectFlag = False for linkID in outLinkIDList: curResultIndex = resultIndex # This routine will advance resultIndex only if a stop is found for linkID, and will exit out when # no more stops are found for linkID. stopMatch = StopMatch(linkID) "@type stopMatch: StopMatch" stopMatches.append(stopMatch) while curResultIndex < len(resultTree): if resultTree[ curResultIndex].pointOnLink.link.id == linkID: # Only pay attention to this stop if it is within the valid time range: gtfsStopTime = gtfsStopsLookup[ resultTree[resultIndex].shapeEntry.shapeSeq] if excludeBegin and gtfsStopTime.arrivalTime < startTime or excludeEnd and gtfsStopTime.arrivalTime > endTime: # Throw away this entire route because it is excluded and part of it falls outside: print( "INFO: Excluded because of activity outside of the valid time range.", file=sys.stderr) del stopMatches[:] rejectFlag = True break elif (widenBegin or gtfsStopTime.arrivalTime >= startTime) and ( widenEnd or gtfsStopTime.arrivalTime <= endTime): if (stopMatch.bestTreeEntry is None) \ or (resultTree[resultIndex].pointOnLink.refDist < stopMatch.bestTreeEntry.pointOnLink.refDist): # Log the best match: stopMatch.bestTreeEntry = resultTree[ resultIndex] stopMatch.matchCtr += 1 resultIndex = curResultIndex + 1 curResultIndex += 1 if (stopMatch and stopMatch.matchCtr == 0) \ or ((curResultIndex < len(resultTree)) and (resultTree[resultIndex].pointOnLink.link.id == linkID)): continue # We have gotten to the end of matched link(s). break if rejectFlag: break # Then, output the results out if we are supposed to. foundStopSet = set() "@type foundStopSet: set<int>" if not rejectFlag: outSeqCtr = longestStart minTime = warmupStartTime maxTime = cooldownEndTime foundValidStop = False for stopMatch in stopMatches: if stopMatch.matchCtr > 1: # Report duplicates: print("WARNING: %d stops have been matched for TripID %d, LinkID %d. Keeping Stop %d, Stop Seq %d" \ % (stopMatch.matchCtr, tripID, stopMatch.linkID, gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID, stopMatch.bestTreeEntry.shapeEntry.shapeSeq), file = sys.stderr) # TODO: This is a problem because VISTA only allows one stop per link. So, the stop that is closest to # the link is the one that is the winner and the rest are ignored. We don't yet do anything intelligent with dwell # times, etc. if stopMatch.matchCtr > 0: # Report the best match: foundStopSet.add( stopMatch.bestTreeEntry.shapeEntry.shapeSeq ) # Check off this stop sequence. foundValidStop = True print( '"%d","%d","%d","%d","%d",' % (tripID, outSeqCtr, stopMatch.linkID, gtfsStopsLookup[stopMatch.bestTreeEntry. shapeEntry.shapeSeq].stop.stopID, DWELLTIME_DEFAULT), file=outFile) if gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID in ret \ and ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID].link.id \ != stopMatch.bestTreeEntry.pointOnLink.link.id: print("WARNING: stopID %d is attempted to be assigned to linkID %d, but it had already been assigned to linkID %d." \ % (gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID, stopMatch.bestTreeEntry.pointOnLink.link.id, ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID].link.id), file = sys.stderr) # TODO: This is a tricky problem. This means that among multiple bus routes, the same stop had been # found to best fit two different links. I don't exactly know the best way to resolve this, other # than (for NMC analyses) to create a "fake" stop that's tied with the new link. else: ret[gtfsStopsLookup[stopMatch.bestTreeEntry. shapeEntry.shapeSeq].stop. stopID] = stopMatch.bestTreeEntry.pointOnLink # Check on the minimum/maximum time range: gtfsStopTime = gtfsStopsLookup[ stopMatch.bestTreeEntry.shapeEntry.shapeSeq] minTime = min(gtfsStopTime.arrivalTime, minTime) maxTime = max(gtfsStopTime.arrivalTime, maxTime) else: # The linkID has nothing to do with any points in consideration. Report it without a stop: if foundValidStop or not excludeUpstream: print('"%d","%d","%d",,,' % (tripID, outSeqCtr, stopMatch.linkID), file=outFile) outSeqCtr += 1 # TODO: For start time estimation (as reported in the public.bus_frequency.csv output), it may be # ideal to keep track of linear distance traveled before the first valid stop. # Widen out the valid interval if needed: warmupStartTime = min(minTime, warmupStartTime) cooldownEndTime = max(maxTime, cooldownEndTime) # Are there any stops left over? If so, report them to say that they aren't in the output file. startGap = -1 endGap = -1 for gtfsStopTime in stopTimes: "@type gtfsStopTime: gtfs.StopTimesEntry" flag = False if gtfsStopTime.stopSeq not in foundStopSet: # This stop is unaccounted for: if startGap < 0: startGap = gtfsStopTime.stopSeq endGap = gtfsStopTime.stopSeq # Old message is very annoying, especially if the underlying topology is a subset of shapefile # geographic area and there's a ton of them. That's why there is the new range message as shown below. # print("WARNING: Trip tripID %d, stopID %d stop seq. %d will not be in the bus_route_link file." % (tripID, # gtfsStopTime.stop.stopID, gtfsStopTime.stopSeq), file = sys.stderr) if problemReport: revisedNodeList = problemReportNodes[ gtfsTrips[tripID].shapeEntries[0].shapeID] if gtfsStopTime.stopSeq not in revisedNodeList: # Make a dummy "error" node for reporting. newShape = gtfs.ShapesEntry( gtfsTrips[tripID].shapeEntries[0].shapeID, gtfsStopTime.stopSeq, gtfsStopTime.stop.gpsLat, gtfsStopTime.stop.gpsLng, False) newPointOnLink = graph.PointOnLink(None, 0) newPointOnLink.pointX = gtfsStopTime.stop.pointX newPointOnLink.pointY = gtfsStopTime.stop.pointY newNode = path_engine.PathEnd( newShape, newPointOnLink) newNode.restart = True revisedNodeList[gtfsStopTime.stopSeq] = newNode else: flag = True if (flag or gtfsStopTime.stopSeq == stopTimes[-1].stopSeq) and startGap >= 0: subStr = "Seqs. %d-%d" % ( startGap, endGap) if startGap != endGap else "Seq. %d" % startGap print( "WARNING: Trip ID %d, Stop %s will not be in the bus_route_link file." % (tripID, subStr), file=sys.stderr) startGap = -1 else: print("WARNING: No links for tripID %d." % tripID, file=sys.stderr) # Deal with Problem Report: if problemReport: print("INFO: Output problem report CSV...", file=sys.stderr) problemReportNodesOut = {} for shapeID in problemReportNodes: seqs = compat.listkeys(problemReportNodes[shapeID]) seqs.sort() ourTgtList = [] for seq in seqs: ourTgtList.append(problemReportNodes[shapeID][seq]) problemReportNodesOut[shapeID] = ourTgtList problem_report.problemReport(problemReportNodesOut, vistaNetwork) return (ret, warmupStartTime, cooldownEndTime)