def restorePathMatch(dbServer, networkName, userName, password, shapePath, pathMatchFilename, useDirectDist=True): # Get the database connected: print("INFO: Connect to database...", file = sys.stderr) database = vista_network.connect(dbServer, userName, password, networkName) # Read in the topology from the VISTA database: print("INFO: Read topology from database...", file = sys.stderr) vistaGraph = vista_network.fillGraph(database, useDirectDist) # Read in the shapefile information: print("INFO: Read GTFS shapefile...", file = sys.stderr) gtfsShapes = gtfs.fillShapes(shapePath, vistaGraph.gps) # Read the path-match file: print("INFO: Read the path-match file '%s'..." % pathMatchFilename, file = sys.stderr) with open(pathMatchFilename, 'r') as inFile: gtfsNodes = path_engine.readStandardDump(vistaGraph, gtfsShapes, inFile) "@type gtfsNodes: dict<int, list<path_engine.PathEnd>>" # Filter out the unused shapes: unusedShapeIDs = set() for shapeID in compat.listkeys(gtfsShapes): if shapeID not in gtfsNodes: del gtfsShapes[shapeID] unusedShapeIDs.add(shapeID) return (vistaGraph, gtfsShapes, gtfsNodes, unusedShapeIDs)
def filterSimilarity(gtfsShapes): """ Compares among all entries and figures out which ones are enough of duplicates. Keeps the longer lists. """ shapeIDs = compat.listkeys(gtfsShapes) excludedIDs = set() "@type excluded: set<int>" for origIndex in range(len(shapeIDs)): "@type origIndex: int" if shapeIDs[origIndex] not in excludedIDs: for targetIndex in range(len(shapeIDs)): "@type targetIndex: int" if (origIndex != targetIndex) and (shapeIDs[targetIndex] not in excludedIDs) \ and (len(gtfsShapes[shapeIDs[origIndex]]) >= len(gtfsShapes[shapeIDs[targetIndex]])): s = difflib.SequenceMatcher() s.set_seqs( [(shapeEntry.lat, shapeEntry.lng) for shapeEntry in gtfsShapes[shapeIDs[origIndex]]], [(shapeEntry.lat, shapeEntry.lng) for shapeEntry in gtfsShapes[shapeIDs[targetIndex]]]) if s.ratio() > SEQUENCE_CUTOFF: excludedIDs.add(shapeIDs[targetIndex]) print("INFO: Shape ID %d is kept, where Shape ID %d is a duplicate (%.3g)" \ % (shapeIDs[origIndex], shapeIDs[targetIndex], s.ratio()), file = sys.stderr) ret = dict(gtfsShapes) for shapeID in excludedIDs: del ret[shapeID] return ret
def restorePathMatch(dbServer, networkName, userName, password, shapePath, pathMatchFilename, useDirectDist=True): # Get the database connected: print("INFO: Connect to database...", file=sys.stderr) database = vista_network.connect(dbServer, userName, password, networkName) # Read in the topology from the VISTA database: print("INFO: Read topology from database...", file=sys.stderr) vistaGraph = vista_network.fillGraph(database, useDirectDist) # Read in the shapefile information: print("INFO: Read GTFS shapefile...", file=sys.stderr) gtfsShapes = gtfs.fillShapes(shapePath, vistaGraph.gps) # Read the path-match file: print("INFO: Read the path-match file '%s'..." % pathMatchFilename, file=sys.stderr) with open(pathMatchFilename, 'r') as inFile: gtfsNodes = path_engine.readStandardDump(vistaGraph, gtfsShapes, inFile) "@type gtfsNodes: dict<int, list<path_engine.PathEnd>>" # Filter out the unused shapes: unusedShapeIDs = set() for shapeID in compat.listkeys(gtfsShapes): if shapeID not in gtfsNodes: del gtfsShapes[shapeID] unusedShapeIDs.add(shapeID) return (vistaGraph, gtfsShapes, gtfsNodes, unusedShapeIDs)
def arcgiscsvReport(gtfsNodes, vistaGraph, outFile=sys.stdout): """ Takes a node set and outputs VISTA table files that report the link matches for the ArcGIS CSV GPS track set. @type gtfsNodes: list<path_engine.PathEnd> @type vistaGraph: graph.GraphLib """ print("objID,datafileID,linkID,time,restart,lat,lng,vistaLat,vistaLng", file=outFile) datafileIDs = compat.listkeys(gtfsNodes) datafileIDs.sort() for datafileID in datafileIDs: gtfsNodeList = gtfsNodes[datafileID] "@type gtfsNodeList: list<path_engine.PathEnd>" for node in gtfsNodeList: "@type node: path_engine.PathEnd" if len(node.routeInfo) > 0: (vistaLat, vistaLng) = vistaGraph.GPS.feet2gps(node.pointOnLink.pointX, node.pointOnLink.pointY) for link in node.routeInfo: outStr = "%d,%s,%d,%s,%d,%g,%g,%g,%g" % ( node.shapeEntry.shapeSeq, node.shapeEntry.shapeID, link.id, node.shapeEntry.time.strftime('%m/%d/%Y %H:%M:%S'), 1 if node.restart else 0, node.shapeEntry.lat, node.shapeEntry.lng, vistaLat, vistaLng) print(outStr, file=outFile)
def pathMatch(dbServer, networkName, userName, password, filename, limitMap = None): # Default parameters, with explanations and cross-references to Perrine et al., 2015: pointSearchRadius = 1000 # "k": Radius (ft) to search from GTFS point to perpendicular VISTA links pointSearchPrimary = 350 # "k_p": Radius (ft) to search from GTFS point to new VISTA links pointSearchSecondary = 200 # "k_s": Radius (ft) to search from VISTA perpendicular point to previous point limitLinearDist = 3800 # Path distance (ft) to allow new proposed paths from one point to another limitDirectDist = 3500 # Radius (ft) to allow new proposed paths from one point to another limitDirectDistRev = 500 # Radius (ft) to allow backtracking on an existing link (e.g. parking lot) distanceFactor = 1.0 # "f_d": Cost multiplier for Linear path distance driftFactor = 1.5 # "f_r": Cost multiplier for distance from GTFS point to its VISTA link nonPerpPenalty = 1.5 # "f_p": Penalty multiplier for GTFS points that aren't perpendicular to VISTA links limitClosestPoints = 8 # "q_p": Number of close-proximity points that are considered for each GTFS point limitSimultaneousPaths = 6 # "q_e": Number of proposed paths to maintain during pathfinding stage maxHops = 12 # Maximum number of VISTA links to pursue in a path-finding operation # Get the database connected: print("INFO: Connect to database...", file = sys.stderr) database = vista_network.connect(dbServer, userName, password, networkName) # Read in the topology from the VISTA database: print("INFO: Read topology from database...", file = sys.stderr) vistaGraph = vista_network.fillGraph(database) # Read in the GPS track information: print("INFO: Read GDB GPS track...", file = sys.stderr) gpsTracks = fillFromFile(filename, vistaGraph.gps) # Initialize the path-finder: pathFinder = path_engine.PathEngine(pointSearchRadius, pointSearchPrimary, pointSearchSecondary, limitLinearDist, limitDirectDist, limitDirectDistRev, distanceFactor, driftFactor, nonPerpPenalty, limitClosestPoints, limitSimultaneousPaths) pathFinder.maxHops = maxHops # Begin iteration through each shape: datafileIDs = compat.listkeys(gpsTracks) "@type datafileIDs: list<str>" datafileIDs.sort() nodesResults = {} "@type nodesResults: dict<str, list<path_engine.PathEnd>>" if limitMap is not None: for datafileID in limitMap: if datafileID not in datafileIDs: print("WARNING: Limit datafile ID %d is not found in the shape file." % datafileID, file = sys.stderr) for datafileID in datafileIDs: "@type datafileID: int" if limitMap is not None and datafileID not in limitMap: continue print("INFO: -- Datafile %s --" % datafileID, file = sys.stderr) # Find the path for the given shape: gtfsNodes = pathFinder.constructPath(gpsTracks[datafileID], vistaGraph) # File this away as a result for later output: nodesResults[datafileID] = gtfsNodes return nodesResults
def filterSimilarity(gtfsShapes): """ Compares among all entries and figures out which ones are enough of duplicates. Keeps the longer lists. """ shapeIDs = compat.listkeys(gtfsShapes) excludedIDs = set() "@type excluded: set<int>" for origIndex in range(len(shapeIDs)): "@type origIndex: int" if shapeIDs[origIndex] not in excludedIDs: for targetIndex in range(len(shapeIDs)): "@type targetIndex: int" if (origIndex != targetIndex) and (shapeIDs[targetIndex] not in excludedIDs) \ and (len(gtfsShapes[shapeIDs[origIndex]]) >= len(gtfsShapes[shapeIDs[targetIndex]])): s = difflib.SequenceMatcher() s.set_seqs([(shapeEntry.lat, shapeEntry.lng) for shapeEntry in gtfsShapes[shapeIDs[origIndex]]], [(shapeEntry.lat, shapeEntry.lng) for shapeEntry in gtfsShapes[shapeIDs[targetIndex]]]) if s.ratio() > SEQUENCE_CUTOFF: excludedIDs.add(shapeIDs[targetIndex]) print("INFO: Shape ID %d is kept, where Shape ID %d is a duplicate (%.3g)" \ % (shapeIDs[origIndex], shapeIDs[targetIndex], s.ratio()), file = sys.stderr) ret = dict(gtfsShapes) for shapeID in excludedIDs: del ret[shapeID] return ret
def dumpGPS(gtfsNodes, vistaGraph, outFile=sys.stdout): """ Takes a GTFS node set and outputs a CSV format of GPS points and other information. @type gtfsNodes: list<path_engine.PathEnd> @type vistaGraph: graph.GraphLib """ print( "shapeID,shapeSeq,linkID,linkDist,gtfsLat,gtfsLng,vistaLat,vistaLng,vistaNodeLat,vistaNodeLng", file=outFile) shapeIDs = compat.listkeys(gtfsNodes) shapeIDs.sort() for shapeID in shapeIDs: gtfsNodeList = gtfsNodes[shapeID] "@type gtfsNodeList: list<path_engine.PathEnd>" for gtfsNode in gtfsNodeList: "@type gtfsNode: path_engine.PathEnd" (vistaLat, vistaLng) = vistaGraph.GPS.feet2gps(gtfsNode.pointOnLink.pointX, gtfsNode.pointOnLink.pointY) outStr = "%d,%d,%d,%g,%g,%g,%g,%g,%g,%g" % ( gtfsNode.shapeEntry.shapeID, gtfsNode.shapeEntry.shapeSeq, gtfsNode.pointOnLink.link.id, gtfsNode.pointOnLink.dist, gtfsNode.shapeEntry.lat, gtfsNode.shapeEntry.lng, vistaLat, vistaLng, gtfsNode.pointOnLink.link.origNode.gpsLat, gtfsNode.pointOnLink.link.origNode.gpsLng) print(outStr, file=outFile)
def problemReport(gtfsNodes, vistaGraph, showLinks=False, outFile=sys.stdout): """ Takes a GTFS node set and outputs a CSV format of GPS points where there are indications of problems. @type gtfsNodes: dict<?, path_engine.PathEnd> @param showLinks: Place markers at starts of links in addition to those at trackpoints. These output problemCode 4. @type showLinks: bool @type vistaGraph: graph.GraphLib """ print( "shapeID,shapeSeq,linkID,linkDist,problemCode,gtfsLatLon,vistaLatLon", file=outFile) shapeIDs = compat.listkeys(gtfsNodes) shapeIDs.sort() for shapeID in shapeIDs: gtfsNodeList = gtfsNodes[shapeID] "@type gtfsNodeList: list<path_engine.PathEnd>" prevSeq = -1 for gtfsNode in gtfsNodeList: "@type gtfsNode: path_engine.PathEnd" (vistaLat, vistaLng) = vistaGraph.gps.feet2gps(gtfsNode.pointOnLink.pointX, gtfsNode.pointOnLink.pointY) # Determine whether we have a problem to report: problemCode = 0 if gtfsNode.restart: problemCode = 1 elif not gtfsNode.pointOnLink.nonPerpPenalty and gtfsNode.pointOnLink.refDist > PERP_DIST: problemCode = 2 elif gtfsNode.pointOnLink.nonPerpPenalty and gtfsNode.pointOnLink.refDist > NONPERP_DIST: problemCode = 3 if showLinks and gtfsNode.routeInfo: divisor = 10**int(math.log10(len(gtfsNode.routeInfo) + 1) + 1) increment = 1 / divisor seqCtr = prevSeq + increment for routeInfo in gtfsNode.routeInfo: "@type routeInfo: graph.GraphLink" outStr = "%s,%g,%d,%g,%d,%s,%s" % ( str(gtfsNode.shapeEntry.shapeID), seqCtr, routeInfo.id, 0, 4, str(routeInfo.origNode.gpsLat) + " " + str(routeInfo.origNode.gpsLng), str(routeInfo.origNode.gpsLat) + " " + str(routeInfo.origNode.gpsLng)) print(outStr, file=outFile) seqCtr += increment outStr = "%s,%d,%d,%g,%d,%s,%s" % ( str(gtfsNode.shapeEntry.shapeID), gtfsNode.shapeEntry.shapeSeq, gtfsNode.pointOnLink.link.id if gtfsNode.pointOnLink.link is not None else -1, gtfsNode.pointOnLink.dist, problemCode, str(gtfsNode.shapeEntry.lat) + " " + str(gtfsNode.shapeEntry.lng), str(vistaLat) + " " + str(vistaLng)) print(outStr, file=outFile) prevSeq = gtfsNode.shapeEntry.shapeSeq
def main(argv): # Initialize from command-line parameters: if len(argv) < 7: syntax() dbServer = argv[1] networkName = argv[2] userName = argv[3] password = argv[4] shapePath = argv[5] pathMatchFilename = argv[6] hintFilename = None routeRestrictFilename = None if len(argv) > 6: i = 7 while i < len(argv): if argv[i] == "-h" and i < len(argv) - 1: hintFilename = argv[i + 1] i += 1 elif argv[i] == "-r" and i < len(argv) - 1: routeRestrictFilename = argv[i + 1] i += 1 i += 1 # Restore the stuff that was built with path_match: (vistaGraph, gtfsShapes, gtfsNodes, unusedShapeIDs) = transit_gtfs.restorePathMatch(dbServer, networkName, userName, password, shapePath, pathMatchFilename) # TODO: We don't do anything with unusedShapeIDs right now. # Restore the hint file if it is specified: if hintFilename is not None: print("INFO: Read hint file...", file = sys.stderr) else: print("INFO: No hint file was specified.", file = sys.stderr) hintEntries = fillHints(hintFilename, shapePath, gtfsShapes, vistaGraph.gps, unusedShapeIDs) "@type hintEntries: dict<int, path_engine.ShapesEntry>" # Filter down the routes that we're interested in: if routeRestrictFilename is not None: gtfsNodes = filterRoutes(gtfsNodes, shapePath, gtfsShapes, routeRestrictFilename) print("INFO: Refining paths.", file = sys.stderr) gtfsNodesResults = pathsRefine(gtfsNodes, hintEntries, vistaGraph) "@type gtfsNodesResults: dict<int, list<path_engine.PathEnd>>" print("INFO: -- Final --", file = sys.stderr) print("INFO: Print output...", file = sys.stderr) path_engine.dumpStandardHeader() shapeIDs = compat.listkeys(gtfsNodesResults) "@type shapeIDs: list<int>" shapeIDs.sort() for shapeID in shapeIDs: "@type shapeID: int" path_engine.dumpStandardInfo(gtfsNodesResults[shapeID]) print("INFO: Done.", file = sys.stderr)
def pathsRefine(gtfsNodes, hintEntries, vistaGraph): # Default parameters, with explanations and cross-references to Perrine et al., 2015: hintRefactorRadius = 1000 # Radius (ft) to invalidate surrounding found points. termRefactorRadius = 3000 # Radius (ft) to invalidate found points at either end of a restart. pointSearchRadius = 1600 # "k": Radius (ft) to search from GTFS point to perpendicular VISTA links pointSearchPrimary = 1600 # "k_p": Radius (ft) to search from GTFS point to new VISTA links pointSearchSecondary = 200 # "k_s": Radius (ft) to search from VISTA perpendicular point to previous point limitLinearDist = 6200 # Path distance (ft) to allow new proposed paths from one point to another limitDirectDist = 6200 # Radius (ft) to allow new proposed paths from one point to another limitDirectDistRev = 500 # Radius (ft) to allow backtracking on an existing link (e.g. parking lot) distanceFactor = 1.0 # "f_d": Cost multiplier for Linear path distance driftFactor = 1.5 # "f_r": Cost multiplier for distance from GTFS point to its VISTA link nonPerpPenalty = 1.5 # "f_p": Penalty multiplier for GTFS points that aren't perpendicular to VISTA links limitClosestPoints = 25 # "q_p": Number of close-proximity points that are considered for each GTFS point limitSimultaneousPaths = 25 # "q_e": Number of proposed paths to maintain during pathfinding stage maxHops = 8 # Maximum number of VISTA links to pursue in a path-finding operation limitHintClosest = 4 # Number of hint closest points and closest previous track points # Initialize the path-finder: pathFinder = path_engine.PathEngine(pointSearchRadius, pointSearchPrimary, pointSearchSecondary, limitLinearDist, limitDirectDist, limitDirectDistRev, distanceFactor, driftFactor, nonPerpPenalty, limitClosestPoints, limitSimultaneousPaths) pathFinder.setRefineParams(hintRefactorRadius, termRefactorRadius) pathFinder.maxHops = maxHops pathFinder.limitHintClosest = limitHintClosest # Begin iteration through each shape: shapeIDs = compat.listkeys(gtfsNodes) "@type shapeIDs: list<int>" shapeIDs.sort() gtfsNodesResults = {} "@type gtfsNodesResults: dict<int, list<path_engine.PathEnd>>" for shapeID in shapeIDs: "@type shapeID: int" print("INFO: -- Shape ID %s --" % str(shapeID), file=sys.stderr) # Find the path for the given shape: gtfsNodesRevised = pathFinder.refinePath( gtfsNodes[shapeID], vistaGraph, hintEntries[shapeID] if shapeID in hintEntries else list()) # File this away as a result for later output: gtfsNodesResults[shapeID] = gtfsNodesRevised return gtfsNodesResults
def pathsRefine(gtfsNodes, hintEntries, vistaGraph): # Default parameters, with explanations and cross-references to Perrine et al., 2015: hintRefactorRadius = 1000 # Radius (ft) to invalidate surrounding found points. termRefactorRadius = 3000 # Radius (ft) to invalidate found points at either end of a restart. pointSearchRadius = 1600 # "k": Radius (ft) to search from GTFS point to perpendicular VISTA links pointSearchPrimary = 1600 # "k_p": Radius (ft) to search from GTFS point to new VISTA links pointSearchSecondary = 200 # "k_s": Radius (ft) to search from VISTA perpendicular point to previous point limitLinearDist = 6200 # Path distance (ft) to allow new proposed paths from one point to another limitDirectDist = 6200 # Radius (ft) to allow new proposed paths from one point to another limitDirectDistRev = 500 # Radius (ft) to allow backtracking on an existing link (e.g. parking lot) distanceFactor = 1.0 # "f_d": Cost multiplier for Linear path distance driftFactor = 1.5 # "f_r": Cost multiplier for distance from GTFS point to its VISTA link nonPerpPenalty = 1.5 # "f_p": Penalty multiplier for GTFS points that aren't perpendicular to VISTA links limitClosestPoints = 25 # "q_p": Number of close-proximity points that are considered for each GTFS point limitSimultaneousPaths = 25 # "q_e": Number of proposed paths to maintain during pathfinding stage maxHops = 8 # Maximum number of VISTA links to pursue in a path-finding operation limitHintClosest = 4 # Number of hint closest points and closest previous track points # Initialize the path-finder: pathFinder = path_engine.PathEngine(pointSearchRadius, pointSearchPrimary, pointSearchSecondary, limitLinearDist, limitDirectDist, limitDirectDistRev, distanceFactor, driftFactor, nonPerpPenalty, limitClosestPoints, limitSimultaneousPaths) pathFinder.setRefineParams(hintRefactorRadius, termRefactorRadius) pathFinder.maxHops = maxHops pathFinder.limitHintClosest = limitHintClosest # Begin iteration through each shape: shapeIDs = compat.listkeys(gtfsNodes) "@type shapeIDs: list<int>" shapeIDs.sort() gtfsNodesResults = {} "@type gtfsNodesResults: dict<int, list<path_engine.PathEnd>>" for shapeID in shapeIDs: "@type shapeID: int" print("INFO: -- Shape ID %s --" % str(shapeID), file = sys.stderr) # Find the path for the given shape: gtfsNodesRevised = pathFinder.refinePath(gtfsNodes[shapeID], vistaGraph, hintEntries[shapeID] if shapeID in hintEntries else list()) # File this away as a result for later output: gtfsNodesResults[shapeID] = gtfsNodesRevised return gtfsNodesResults
def main(argv): # Initialize from command-line parameters: if (len(argv) < 1) or (argv[1].lower == "-h") or (argv[1].lower == "--help"): syntax() shapePath = argv[1] routeRestrictFilename = None if len(argv) > 1: i = 2 while i < len(argv): if argv[i] == "-x" and i < len(argv) - 1: routeRestrictFilename = argv[i + 1] i += 1 i += 1 # Create a fake GPS coordinate: graph = graph.GraphLib(0, 0) # Read in the shapefile information: print("INFO: Read GTFS shapefile...", file=sys.stderr) gtfsShapes = gtfs.fillShapes(shapePath, graph.GPS) # Filter shapes according to exclusion file: if routeRestrictFilename is not None: gtfsShapes = path_refine.filterRoutes(gtfsShapes, shapePath, gtfsShapes, routeRestrictFilename, True) # Similarity search: gtfsShapes = filterSimilarity(gtfsShapes) # Extract useful information: print("INFO: Print output...", file=sys.stderr) shapeIDs = compat.listkeys(gtfsShapes) "@type shapeIDs: list<int>" shapeIDs.sort() print( "shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled" ) for shapeID in shapeIDs: "@type shapeID: int" for shapeEntry in gtfsShapes[shapeID]: "@type shapeEntry: gtfs.ShapesEntry" print("%d,%f,%f,%d," % (shapeEntry.shapeID, shapeEntry.lat, shapeEntry.lng, shapeEntry.shapeSeq))
def dumpGPS(gtfsNodes, vistaGraph, outFile = sys.stdout): """ Takes a GTFS node set and outputs a CSV format of GPS points and other information. @type gtfsNodes: list<path_engine.PathEnd> @type vistaGraph: graph.GraphLib """ print("shapeID,shapeSeq,linkID,linkDist,gtfsLat,gtfsLng,vistaLat,vistaLng,vistaNodeLat,vistaNodeLng", file = outFile) shapeIDs = compat.listkeys(gtfsNodes) shapeIDs.sort() for shapeID in shapeIDs: gtfsNodeList = gtfsNodes[shapeID] "@type gtfsNodeList: list<path_engine.PathEnd>" for gtfsNode in gtfsNodeList: "@type gtfsNode: path_engine.PathEnd" (vistaLat, vistaLng) = vistaGraph.GPS.feet2gps(gtfsNode.pointOnLink.pointX, gtfsNode.pointOnLink.pointY) outStr = "%d,%d,%d,%g,%g,%g,%g,%g,%g,%g" % (gtfsNode.shapeEntry.shapeID, gtfsNode.shapeEntry.shapeSeq, gtfsNode.pointOnLink.link.id, gtfsNode.pointOnLink.dist, gtfsNode.shapeEntry.lat, gtfsNode.shapeEntry.lng, vistaLat, vistaLng, gtfsNode.pointOnLink.link.origNode.gpsLat, gtfsNode.pointOnLink.link.origNode.gpsLng) print(outStr, file = outFile)
def main(argv): # Initialize from command-line parameters: if (len(argv) < 1) or (argv[1].lower == "-h") or (argv[1].lower == "--help"): syntax() shapePath = argv[1] routeRestrictFilename = None if len(argv) > 1: i = 2 while i < len(argv): if argv[i] == "-x" and i < len(argv) - 1: routeRestrictFilename = argv[i + 1] i += 1 i += 1 # Create a fake GPS coordinate: graph = graph.GraphLib(0, 0) # Read in the shapefile information: print("INFO: Read GTFS shapefile...", file = sys.stderr) gtfsShapes = gtfs.fillShapes(shapePath, graph.GPS) # Filter shapes according to exclusion file: if routeRestrictFilename is not None: gtfsShapes = path_refine.filterRoutes(gtfsShapes, shapePath, gtfsShapes, routeRestrictFilename, True) # Similarity search: gtfsShapes = filterSimilarity(gtfsShapes) # Extract useful information: print("INFO: Print output...", file = sys.stderr) shapeIDs = compat.listkeys(gtfsShapes) "@type shapeIDs: list<int>" shapeIDs.sort() print("shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled") for shapeID in shapeIDs: "@type shapeID: int" for shapeEntry in gtfsShapes[shapeID]: "@type shapeEntry: gtfs.ShapesEntry" print("%d,%f,%f,%d," % (shapeEntry.shapeID, shapeEntry.lat, shapeEntry.lng, shapeEntry.shapeSeq))
def dumpBusRoutes(gtfsTrips, userName, networkName, outFile = sys.stdout): """ dumpBusRoutes dumps out a public.bus_route.csv file contents. @type gtfsTrips: dict<int, gtfs.TripsEntry> @type userName: str @type networkName: str @type outFile: file """ _outHeader("public.bus_route", userName, networkName, outFile) print("\"id\",\"name\",", file = outFile) # Remember, we are treating each route as a trip. tripIDs = compat.listkeys(gtfsTrips) tripIDs.sort() for tripID in tripIDs: append = "" if len(gtfsTrips[tripID].route.name) > 0: append = ": " + gtfsTrips[tripID].route.name if len(gtfsTrips[tripID].tripHeadsign) > 0: append += " " + gtfsTrips[tripID].tripHeadsign print("\"%d\",\"%s\"" % (tripID, gtfsTrips[tripID].route.shortName + append), file = outFile)
def main(argv): # Initialize from command-line parameters: if len(argv) < 6: syntax() dbServer = argv[1] networkName = argv[2] userName = argv[3] password = argv[4] filename = argv[5] gtfsNodesResults = pathMatch(dbServer, networkName, userName, password, filename) # Extract useful information: print("INFO: -- Final --", file = sys.stderr) print("INFO: Print output...", file = sys.stderr) path_engine.dumpStandardHeader() datafileIDs = compat.listkeys(gtfsNodesResults) "@type datafileIDs: list<str>" datafileIDs.sort() for datafileID in datafileIDs: "@type datafileID: str" path_engine.dumpStandardInfo(gtfsNodesResults[datafileID])
def dumpBusRoutes(gtfsTrips, userName, networkName, outFile=sys.stdout): """ dumpBusRoutes dumps out a public.bus_route.csv file contents. @type gtfsTrips: dict<int, gtfs.TripsEntry> @type userName: str @type networkName: str @type outFile: file """ _outHeader("public.bus_route", userName, networkName, outFile) print("\"id\",\"name\",", file=outFile) # Remember, we are treating each route as a trip. tripIDs = compat.listkeys(gtfsTrips) tripIDs.sort() for tripID in tripIDs: append = "" if len(gtfsTrips[tripID].route.name) > 0: append = ": " + gtfsTrips[tripID].route.name if len(gtfsTrips[tripID].tripHeadsign) > 0: append += " " + gtfsTrips[tripID].tripHeadsign print("\"%d\",\"%s\"" % (tripID, gtfsTrips[tripID].route.shortName + append), file=outFile)
def gdbReport(gtfsNodes, vistaGraph, outFile = sys.stdout): """ Takes a node set and outputs VISTA table files that report the link matches for the GDB GPS track set. @type gtfsNodes: list<path_engine.PathEnd> @type vistaGraph: graph.GraphLib """ print("objID,datafileID,linkID,time,speed,dist,restart,lat,lng,vistaLat,vistaLng", file = outFile) datafileIDs = compat.listkeys(gtfsNodes) datafileIDs.sort() for datafileID in datafileIDs: gtfsNodeList = gtfsNodes[datafileID] "@type gtfsNodeList: list<path_engine.PathEnd>" for node in gtfsNodeList: "@type node: path_engine.PathEnd" if len(node.routeInfo) > 0: (vistaLat, vistaLng) = vistaGraph.gps.feet2gps(node.pointOnLink.pointX, node.pointOnLink.pointY) for link in node.routeInfo: outStr = "%d,%s,%d,%s,%g,%g,%d,%g,%g,%g,%g" % (node.shapeEntry.shapeSeq, node.shapeEntry.shapeID, link.id, node.shapeEntry.time.strftime('%H:%M:%S'), node.shapeEntry.speed, node.pointOnLink.dist, 1 if node.restart else 0, node.shapeEntry.lat, node.shapeEntry.lng, vistaLat, vistaLng) print(outStr, file = outFile)
def main(argv): # Initialize from command-line parameters: if len(argv) < 7: syntax() dbServer = argv[1] networkName = argv[2] userName = argv[3] password = argv[4] shapePath = argv[5] pathMatchFilename = argv[6] hintFilename = None routeRestrictFilename = None if len(argv) > 6: i = 7 while i < len(argv): if argv[i] == "-h" and i < len(argv) - 1: hintFilename = argv[i + 1] i += 1 elif argv[i] == "-r" and i < len(argv) - 1: routeRestrictFilename = argv[i + 1] i += 1 i += 1 # Restore the stuff that was built with path_match: (vistaGraph, gtfsShapes, gtfsNodes, unusedShapeIDs) = transit_gtfs.restorePathMatch(dbServer, networkName, userName, password, shapePath, pathMatchFilename) # TODO: We don't do anything with unusedShapeIDs right now. # Restore the hint file if it is specified: if hintFilename is not None: print("INFO: Read hint file...", file=sys.stderr) else: print("INFO: No hint file was specified.", file=sys.stderr) hintEntries = fillHints(hintFilename, shapePath, gtfsShapes, vistaGraph.gps, unusedShapeIDs) "@type hintEntries: dict<int, path_engine.ShapesEntry>" # Filter down the routes that we're interested in: if routeRestrictFilename is not None: gtfsNodes = filterRoutes(gtfsNodes, shapePath, gtfsShapes, routeRestrictFilename) print("INFO: Refining paths.", file=sys.stderr) gtfsNodesResults = pathsRefine(gtfsNodes, hintEntries, vistaGraph) "@type gtfsNodesResults: dict<int, list<path_engine.PathEnd>>" print("INFO: -- Final --", file=sys.stderr) print("INFO: Print output...", file=sys.stderr) path_engine.dumpStandardHeader() shapeIDs = compat.listkeys(gtfsNodesResults) "@type shapeIDs: list<int>" shapeIDs.sort() for shapeID in shapeIDs: "@type shapeID: int" path_engine.dumpStandardInfo(gtfsNodesResults[shapeID]) print("INFO: Done.", file=sys.stderr)
def main(argv): # Initialize from command-line parameters: if len(argv) < 7: syntax(0) dbServer = argv[1] networkName = argv[2] userName = argv[3] password = argv[4] csvFilename = argv[5] csvPathMatch = argv[6] sourceID = 0 endTime = 86400 refTime = None problemReport = False if len(argv) > 6: i = 7 while i < len(argv): if argv[i] == "-s" and i < len(argv) - 1: sourceID = int(argv[i + 1]) i += 1 elif argv[i] == "-t" and i < len(argv) - 1: refTime = datetime.strptime(argv[i + 1], '%H:%M:%S') i += 1 elif argv[i] == "-e" and i < len(argv) - 1: endTime = int(argv[i + 1]) i += 1 elif argv[i] == "-p": problemReport = True i += 1 if refTime is None and not problemReport: print("ERROR: No reference time is specified.") syntax(1) # Get the database connected: print("INFO: Connect to database...", file=sys.stderr) database = vista_network.connect(dbServer, userName, password, networkName) # Read in the topology from the VISTA database: print("INFO: Read topology from database...", file=sys.stderr) vistaGraph = vista_network.fillGraph(database) # Read in the GPS track information: print("INFO: Read ArcGIS CSV GPS track '%s'..." % csvFilename, file=sys.stderr) gpsTracks = arcgiscsv_extracted.fillFromFile(csvFilename, vistaGraph.gps) # Restore the path match: print("INFO: Read the ArcGIS CSV path-match file '%s'..." % csvPathMatch, file=sys.stderr) with open(csvPathMatch, 'r') as inFile: nodes = path_engine.readStandardDump(vistaGraph, gpsTracks, inFile, lambda x: str(x)) # Assumption: Each shapeID corresponds with one trip that will be reported in the output. # And, each route corresponds with one trip. # Filter out nodes that have one or zero links: for shapeID in compat.listkeys(nodes): ctr = 0 for node in nodes[shapeID]: ctr += len(node.routeInfo) if ctr <= 1: print("INFO: Filtering out shapeID %s." % str(shapeID), file=sys.stderr) del nodes[shapeID] del gpsTracks[shapeID] # Deal with Problem Report: if problemReport: print("INFO: Output problem report CSV...", file=sys.stderr) problem_report.problemReport(nodes, vistaGraph) print("INFO: Done.", file=sys.stderr) return # TODO: The logic below is a hack to create unique routes given GDB IDs. There are several # long-term problems with this, including the idea that it is impossible to reuse common # routes (each instance is its own route) and there are assumptions about vehicle ID # numbering in the generated vehicles. # Fabricate routes: routes = {} ctr = 1 # We'll be making arbitrary route IDs: for shapeID in gpsTracks: routes[ctr] = gtfs.RoutesEntry(ctr, shapeID, "") ctr += 1 # Let vehicle IDs be in a different number range: vehCtr = int(ctr / 10000) vehCtr += 10000 # Fabricate trips and stop times: trips = {} stopTimes = {} for routeID in routes: trips[vehCtr] = gtfs.TripsEntry(vehCtr, routes[routeID], "", gpsTracks[routes[routeID].shortName]) stopTimes[trips[vehCtr]] = list( ) # Fake the system by having no stops defined. vehCtr += 1 tripIDs = compat.listkeys(trips) tripIDs.sort() # Output the routes file: print("INFO: Dumping public.bus_route.csv...", file=sys.stderr) with open("public.bus_route.csv", 'w') as outFile: transit_gtfs.dumpBusRoutes(trips, userName, networkName, outFile) # Output the routes_link file: print("INFO: Dumping public.bus_route_link.csv...", file=sys.stderr) with open("public.bus_route_link.csv", 'w') as outFile: transit_gtfs.dumpBusRouteLinks(trips, stopTimes, nodes, vistaGraph, 1, False, userName, networkName, refTime, endTime, False, False, False, False, outFile) print("INFO: Dumping public.bus_frequency.csv...", file=sys.stderr) with open("public.bus_frequency.csv", 'w') as outFile: transit_gtfs._outHeader("public.bus_frequency", userName, networkName, outFile) print( "\"route\",\"period\",\"frequency\",\"offsettime\",\"preemption\"", file=outFile) for tripID in tripIDs: departureTime = trips[tripID].shapeEntries[0].time timeDiff = departureTime - refTime print("%d,1,86400,%d,0" % (tripID, timeDiff.days * 24 * 3600 + timeDiff.seconds), file=outFile) print("INFO: Dumping public.bus_period.csv...", file=sys.stderr) with open("public.bus_period.csv", 'w') as outFile: transit_gtfs._outHeader("public.bus_period", userName, networkName, outFile) print("\"id\",\"starttime\",\"endtime\"", file=outFile) print("1,0,%d" % endTime, file=outFile) # Now we need to write out to the travel_time output: print("INFO: Dumping public.travel_time.csv...", file=sys.stderr) with open("public.travel_time.csv", 'w') as outFile: transit_gtfs._outHeader("public.travel_time", userName, networkName, outFile) print( "\"departure_time\",\"vehicle_id\",\"route_id\",\"exittime\",\"linkid\",\"arrivaltime\",\"sourceid\"", file=outFile) for tripID in tripIDs: nodeList = nodes[trips[tripID].route.shortName] "@type nodeList: list<path_engine.PathEnd>" departureTime = trips[tripID].shapeEntries[0].time lastTime = trips[tripID].shapeEntries[-1].time # Add the first link to the file: timeDiff = departureTime - refTime timeDiffLast = lastTime - refTime outStr = "%d,%d,%d,%d,%d,%d,%d" % ( timeDiff.days * 24 * 3600 + timeDiff.seconds, trips[tripID].route.routeID, tripID, timeDiffLast.days * 24 * 3600 + timeDiffLast.seconds, nodeList[0].pointOnLink.link.id, timeDiff.days * 24 * 3600 + timeDiff.seconds, sourceID) print(outStr, file=outFile) for node in nodeList: "@type node: path_engine.PathEnd" if len(node.routeInfo) > 0: # TODO: Deal with midnight if the time is before refTime. arrivalTime = node.shapeEntry.time for link in node.routeInfo: arrivalTimeSec = 3600 * arrivalTime.hour + 60 * arrivalTime.minute + arrivalTime.second # TODO: We need to make vehicleID, routeID and tripID be consistent. timeDiffArr = arrivalTime - refTime outStr = "%d,%d,%d,%d,%d,%d,%d" % ( timeDiff.days * 24 * 3600 + timeDiff.seconds, trips[tripID].route.routeID, tripID, timeDiffLast.days * 24 * 3600 + timeDiffLast.seconds, link.id, timeDiffArr.days * 24 * 3600 + timeDiffArr.seconds, sourceID) print(outStr, file=outFile) print("INFO: Done.", file=sys.stderr)
def main(argv): global problemReport excludeUpstream = False # Initialize from command-line parameters: if len(argv) < 7: syntax(1) dbServer = argv[1] networkName = argv[2] userName = argv[3] password = argv[4] shapePath = argv[5] pathMatchFilename = argv[6] endTimeInt = 86400 refTime = None widenBegin = False widenEnd = False excludeBegin = False excludeEnd = False restrictService = set() "@type restrictService: set<string>" if len(argv) > 6: i = 7 while i < len(argv): if argv[i] == "-t" and i < len(argv) - 1: refTime = datetime.strptime(argv[i + 1], '%H:%M:%S') i += 1 elif argv[i] == "-e" and i < len(argv) - 1: endTimeInt = int(argv[i + 1]) i += 1 elif argv[i] == "-c" and i < len(argv) - 1: restrictService.add(argv[i + 1]) i += 1 elif argv[i] == "-u": excludeUpstream = True elif argv[i] == "-w": widenBegin = True widenEnd = True elif argv[i] == "-wb": widenBegin = True elif argv[i] == "-we": widenEnd = True elif argv[i] == "-x": excludeBegin = True excludeEnd = True elif argv[i] == "-xb": excludeBegin = True elif argv[i] == "-xe": excludeEnd = True elif argv[i] == "-p": problemReport = True i += 1 if refTime is None: print( "ERROR: No reference time is specified. You must use the -t parameter.", file=sys.stderr) syntax(1) endTime = refTime + timedelta(seconds=endTimeInt) if widenBegin and excludeBegin: print( "ERROR: Widening (-w or -wb) and exclusion (-x or -xb) cannot be used together." ) syntax(1) if widenEnd and excludeEnd: print( "ERROR: Widening (-w or -we) and exclusion (-x or -xe) cannot be used together." ) syntax(1) # Default parameters: stopSearchRadius = 800 # Restore the stuff that was built with path_match: (vistaGraph, gtfsShapes, gtfsNodes, unusedShapeIDs) = restorePathMatch(dbServer, networkName, userName, password, shapePath, pathMatchFilename) # Read in the stuff from GTFS that further defines buses: _, gtfsStops, gtfsTrips, gtfsStopTimes = readBusRecords( shapePath, vistaGraph, gtfsShapes, unusedShapeIDs, restrictService) # Output the routes_link file: print("INFO: Dumping public.bus_route_link.csv...", file=sys.stderr) with open("public.bus_route_link.csv", 'w') as outFile: (stopLinkMap, newStartTime, newEndTime) = dumpBusRouteLinks( gtfsTrips, gtfsStopTimes, gtfsNodes, vistaGraph, stopSearchRadius, excludeUpstream, userName, networkName, refTime, endTime, widenBegin, widenEnd, excludeBegin, excludeEnd, outFile) "@type stopLinkMap: dict<int, graph.PointOnLink>" # Filter only to bus stops and stop times that are used in the routes_link output: gtfsStopsFilterList = [ gtfsStopID for gtfsStopID in gtfsStops if gtfsStopID not in stopLinkMap ] for gtfsStopID in gtfsStopsFilterList: del gtfsStops[gtfsStopID] del gtfsStopsFilterList # Then, output the output the stop file: print("INFO: Dumping public.bus_stop.csv...", file=sys.stderr) with open("public.bus_stop.csv", 'w') as outFile: dumpBusStops(gtfsStops, stopLinkMap, userName, networkName, outFile) print("INFO: Dumping public.bus_frequency.csv...", file=sys.stderr) validTrips = {} "@type validTrips: dict<int, gtfs.TripsEntry>" with open("public.bus_frequency.csv", 'w') as outFile: _outHeader("public.bus_frequency", userName, networkName, outFile) print( "\"route\",\"period\",\"frequency\",\"offsettime\",\"preemption\"", file=outFile) # Okay, here we iterate through stops until we get to the first defined one. That will # then affect the offsettime. (This is needed because of the idea that we want to start # a bus in the simulation wrt the topology that supports it, skipping those stops that # may fall outside the topology.) totalCycle = int((newEndTime - newStartTime).total_seconds()) tripIDs = compat.listkeys(gtfsTrips) tripIDs.sort() for tripID in tripIDs: stopsEntries = gtfsStopTimes[gtfsTrips[tripID]] for gtfsStopTime in stopsEntries: if gtfsStopTime.stop.stopID in gtfsStops: # Here is a first valid entry! Use this offset value. # TODO: This could be inaccurate because the offset is that of the first # valid stop time encountered in the underlying topology, not approximated # to the first valid link encountered. While this isn't a big deal for an # area with a high stop density, it could be a problem for limited-stop # service where there happens to be a low density around where the bus # first appears in the underlying topology. stopTime = gtfsStopTime.arrivalTime # Adjust for cases where we need to add a day. if stopTime < newStartTime: # Assume that we're working just within a day. stopTime += timedelta( days=int((newStartTime - stopTime).total_seconds()) / 86400 + 1) print("%d,1,%d,%d,0" % (tripID, totalCycle, int((stopTime - newStartTime).total_seconds())), file=outFile) validTrips[tripID] = gtfsTrips[tripID] # Record as valid. break # A byproduct of this scheme is that no bus_frequency entry will appear for # routes that don't have stops in the underlying topology. # Output the routes file: print("INFO: Dumping public.bus_route.csv...", file=sys.stderr) with open("public.bus_route.csv", 'w') as outFile: dumpBusRoutes(validTrips, userName, networkName, outFile) # Finally, define one period that spans the whole working time, which all of the individually # defined routes (again, one route per trip) will operate in. print("INFO: Dumping public.bus_period.csv...", file=sys.stderr) with open("public.bus_period.csv", 'w') as outFile: _outHeader("public.bus_period", userName, networkName, outFile) print("\"id\",\"starttime\",\"endtime\"", file=outFile) # The start time printed here is relative to the reference time. print("1,0,%d" % endTimeInt, file=outFile) if widenBegin or widenEnd: # Report the implicit adjustment in times because of warmup or cooldown: startTimeDiff = refTime - newStartTime endTimeDiff = newEndTime - endTime print( "INFO: Widening requires start %d sec. earlier and duration %d sec. longer." % (startTimeDiff.total_seconds(), endTimeDiff.total_seconds() + startTimeDiff.total_seconds()), file=sys.stderr) totalTimeDiff = newEndTime - newStartTime print( "INFO: New time reference is %s, duration %d sec." % (newStartTime.strftime("%H:%M:%S"), totalTimeDiff.total_seconds()), file=sys.stderr) print("INFO: Done.", file=sys.stderr)
def dumpBusRouteLinks(gtfsTrips, gtfsStopTimes, gtfsNodes, vistaNetwork, stopSearchRadius, excludeUpstream, userName, networkName, startTime, endTime, widenBegin, widenEnd, excludeBegin, excludeEnd, outFile=sys.stdout): """ dumpBusRouteLinks dumps out a public.bus_route_link.csv file contents. This also will remove all stop times and trips that fall outside of the valid evaluation interval as dictated by the exclusion parameters. @type gtfsTrips: dict<int, gtfs.TripsEntry> @type gtfsStopTimes: dict<TripsEntry, list<StopTimesEntry>> @type gtfsNodes: dict<int, list<path_engine.PathEnd>> @type vistaNetwork: graph.GraphLib @type stopSearchRadius: float @type excludeUpstream: boolean @type userName: str @type networkName: str @type startTime: datetime @type endTime: datetime @type widenBegin: bool @type widenEnd: bool @type excludeBegin: bool @type excludeEnd: bool @type outFile: file @return A mapping of stopID to points-on-links plus the start and end times adjusted for warm-up and cool-down (if widenBegin or widenEnd is True) @rtype (dict<int, graph.PointOnLink>, datetime, datetime) """ _outHeader("public.bus_route_link", userName, networkName, outFile) print('"route","sequence","link","stop","dwelltime",', file=outFile) # Set up the output: ret = {} "@type ret: dict<int, graph.PointOnLink>" warmupStartTime = startTime cooldownEndTime = endTime # Initialize the path engine for use later: pathEngine = path_engine.PathEngine(stopSearchRadius, stopSearchRadius, stopSearchRadius, sys.float_info.max, sys.float_info.max, stopSearchRadius, 1, 1, 1, sys.maxsize, sys.maxsize) pathEngine.limitClosestPoints = 8 pathEngine.limitSimultaneousPaths = 6 pathEngine.maxHops = 12 pathEngine.logFile = None # Suppress the log outputs for the path engine; enough stuff will come from other sources. problemReportNodes = {} "@type problemReportNodes: dict<?, path_engine.PathEnd>" tripIDs = compat.listkeys(gtfsTrips) tripIDs.sort() for tripID in tripIDs: if gtfsTrips[tripID].shapeEntries[0].shapeID not in gtfsNodes: # This happens if the incoming files contain a subset of all available topology. print( "WARNING: Skipping route for trip %d because no points are available." % tripID, file=sys.stderr) continue treeNodes = gtfsNodes[gtfsTrips[tripID].shapeEntries[0].shapeID] "@type treeNodes: list<path_engine.PathEnd>" # Step 1: Find the longest distance of contiguous valid links within the shape for each trip: startIndex = -1 curIndex = 0 linkCount = 0 totalLinks = 0 longestStart = -1 longestEnd = len(treeNodes) longestDist = sys.float_info.min longestLinkCount = 0 while curIndex <= len(treeNodes): if (curIndex == len(treeNodes)) or ( curIndex == 0) or treeNodes[curIndex].restart: totalLinks += 1 linkCount += 1 if (curIndex > startIndex) and (startIndex >= 0): # We have a contiguous interval. See if it wins: if treeNodes[curIndex - 1].totalDist - treeNodes[ startIndex].totalDist > longestDist: longestStart = startIndex longestEnd = curIndex longestDist = treeNodes[ curIndex - 1].totalDist - treeNodes[startIndex].totalDist longestLinkCount = linkCount linkCount = 0 # This happens if it is time to start a new interval: startIndex = curIndex else: totalLinks += len(treeNodes[curIndex].routeInfo) linkCount += len(treeNodes[curIndex].routeInfo) curIndex += 1 if longestStart >= 0: # We have a valid path. See if it had been trimmed down and report it. if (longestStart > 0) or (longestEnd < len(treeNodes)): print("WARNING: For shape ID %s from seq. %d through %d, %.2g%% of %d links will be used." \ % (str(treeNodes[longestStart].shapeEntry.shapeID), treeNodes[longestStart].shapeEntry.shapeSeq, treeNodes[longestEnd - 1].shapeEntry.shapeSeq, 100 * float(longestLinkCount) / float(totalLinks), totalLinks), file = sys.stderr) # Step 2: Ignore routes that are entirely outside our valid time interval. flag = False if len(gtfsStopTimes[gtfsTrips[tripID]]) == 0: # This will happen if we don't have stops defined. In this case, we want to go ahead and process the bus_route_link # outputs because we don't know if the trip falls in or out of the valid time range. flag = True else: for stopEntry in gtfsStopTimes[gtfsTrips[tripID]]: if stopEntry.arrivalTime >= startTime and stopEntry.arrivalTime <= endTime: flag = True break if not flag: # This will be done silently because (depending upon the valid interval) there could be # hundreds of these in a GTFS set. continue # Step 3: Match up stops to that contiguous list: # At this point, we're doing something with this. print("INFO: -- Matching stops for trip %d --" % tripID, file=sys.stderr) stopTimes = gtfsStopTimes[gtfsTrips[tripID]] "@type stopTimes: list<gtfs.StopTimesEntry>" # Isolate the relevant VISTA tree nodes: (Assume from above that this is a non-zero length array) ourGTFSNodes = treeNodes[longestStart:longestEnd] # We are going to recreate a small VISTA network from ourGTFSNodes and then match up the stops to that. # First, prepare the small VISTA network: vistaSubset = graph.GraphLib(vistaNetwork.gps.latCtr, vistaNetwork.gps.lngCtr) vistaNodePrior = None "@type vistaNodePrior: graph.GraphNode" # Build a list of links: outLinkIDList = [] "@type outLinkList: list<int>" # Plop in the start node: vistaNodePrior = graph.GraphNode( ourGTFSNodes[0].pointOnLink.link.origNode.id, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLat, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLng) vistaSubset.addNode(vistaNodePrior) outLinkIDList.append(ourGTFSNodes[0].pointOnLink.link.id) # Link together nodes as we traverse through them: for ourGTFSNode in ourGTFSNodes: "@type ourGTFSNode: path_engine.PathEnd" # There should only be one destination link per VISTA node because this comes form our tree. # If there is no link or we're repeating the first one, then there were no new links assigned. if (len(ourGTFSNode.routeInfo) < 1) or ((len(outLinkIDList) == 1) \ and (ourGTFSNode.routeInfo[0].id == ourGTFSNodes[0].pointOnLink.link.id)): continue for link in ourGTFSNode.routeInfo: "@type link: graph.GraphLink" if link.id not in vistaNetwork.linkMap: print( "WARNING: In finding bus route links, link ID %d is not found in the VISTA network." % link.id, file=sys.stderr) continue origVistaLink = vistaNetwork.linkMap[link.id] "@type origVistaLink: graph.GraphLink" if origVistaLink.origNode.id not in vistaSubset.nodeMap: # Create a new node: vistaNode = graph.GraphNode( origVistaLink.origNode.id, origVistaLink.origNode.gpsLat, origVistaLink.origNode.gpsLng) vistaSubset.addNode(vistaNode) else: # The path evidently crosses over itself. Reuse an existing node. vistaNode = vistaSubset.nodeMap[ origVistaLink.origNode.id] # We shall label our links as indices into the stage we're at in ourGTFSNodes links. This will allow for access later. if outLinkIDList[-1] not in vistaSubset.linkMap: vistaSubset.addLink( graph.GraphLink(outLinkIDList[-1], vistaNodePrior, vistaNode)) vistaNodePrior = vistaNode outLinkIDList.append(link.id) # And then finish off the graph with the last link: if ourGTFSNode.pointOnLink.link.destNode.id not in vistaSubset.nodeMap: vistaNode = graph.GraphNode( ourGTFSNode.pointOnLink.link.destNode.id, ourGTFSNode.pointOnLink.link.destNode.gpsLat, ourGTFSNode.pointOnLink.link.destNode.gpsLng) vistaSubset.addNode(vistaNode) if outLinkIDList[-1] not in vistaSubset.linkMap: vistaSubset.addLink( graph.GraphLink(outLinkIDList[-1], vistaNodePrior, vistaNode)) # Then, prepare the stops as GTFS shapes entries: print("INFO: Mapping stops to VISTA network...", file=sys.stderr) gtfsShapes = [] gtfsStopsLookup = {} "@type gtfsStopsLookup: dict<int, gtfs.StopTimesEntry>" # Append an initial dummy shape to force routing through the path start: gtfsShapes.append( gtfs.ShapesEntry( -1, -1, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLat, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLng)) # Append all of the stops: for gtfsStopTime in stopTimes: "@type gtfsStopTime: gtfs.StopTimesEntry" gtfsShapes.append( gtfs.ShapesEntry(-1, gtfsStopTime.stopSeq, gtfsStopTime.stop.gpsLat, gtfsStopTime.stop.gpsLng)) gtfsStopsLookup[gtfsStopTime.stopSeq] = gtfsStopTime # Append a trailing dummy shape to force routing through the path end: gtfsShapes.append( gtfs.ShapesEntry( -1, -1, ourGTFSNodes[-1].pointOnLink.link.destNode.gpsLat, ourGTFSNodes[-1].pointOnLink.link.destNode.gpsLng)) # Find a path through our prepared node map subset: resultTree = pathEngine.constructPath(gtfsShapes, vistaSubset) "@type resultTree: list<path_engine.PathEnd>" # Strip off the dummy ends: del resultTree[-1] del resultTree[0] if len(resultTree) > 0: resultTree[0].prevTreeNode = None # So now we should have one tree entry per matched stop. # Deal with Problem Report: # TODO: The Problem Report will include all nodes on each path regardless of valid time interval; # However; we will not have gotten here if the trip was entirely outside of it. if problemReport: revisedNodeList = {} prevNode = None "@type revisedNodeList = list<path_engine.PathEnd>" for stopNode in resultTree: # Reconstruct a tree node in terms of the original network. newShape = gtfs.ShapesEntry( gtfsTrips[tripID].shapeEntries[0].shapeID, stopNode.shapeEntry.shapeSeq, stopNode.shapeEntry.lat, stopNode.shapeEntry.lng, False) origLink = vistaNetwork.linkMap[ stopNode.pointOnLink.link.id] newPointOnLink = graph.PointOnLink( origLink, stopNode.pointOnLink.dist, stopNode.pointOnLink.nonPerpPenalty, stopNode.pointOnLink.refDist) newNode = path_engine.PathEnd(newShape, newPointOnLink) newNode.restart = False newNode.totalCost = stopNode.totalCost newNode.totalDist = stopNode.totalDist newNode.routeInfo = [] for link in stopNode.routeInfo: newNode.routeInfo.append(vistaNetwork.linkMap[link.id]) newNode.prevTreeNode = prevNode prevNode = newNode revisedNodeList[stopNode.shapeEntry.shapeSeq] = newNode problemReportNodes[gtfsTrips[tripID].shapeEntries[0]. shapeID] = revisedNodeList # Walk through our output link list and see where the resultTree entries occur: resultIndex = 0 stopMatches = [] "@type stopMatches: list<StopMatch>" rejectFlag = False for linkID in outLinkIDList: curResultIndex = resultIndex # This routine will advance resultIndex only if a stop is found for linkID, and will exit out when # no more stops are found for linkID. stopMatch = StopMatch(linkID) "@type stopMatch: StopMatch" stopMatches.append(stopMatch) while curResultIndex < len(resultTree): if resultTree[ curResultIndex].pointOnLink.link.id == linkID: # Only pay attention to this stop if it is within the valid time range: gtfsStopTime = gtfsStopsLookup[ resultTree[resultIndex].shapeEntry.shapeSeq] if excludeBegin and gtfsStopTime.arrivalTime < startTime or excludeEnd and gtfsStopTime.arrivalTime > endTime: # Throw away this entire route because it is excluded and part of it falls outside: print( "INFO: Excluded because of activity outside of the valid time range.", file=sys.stderr) del stopMatches[:] rejectFlag = True break elif (widenBegin or gtfsStopTime.arrivalTime >= startTime) and ( widenEnd or gtfsStopTime.arrivalTime <= endTime): if (stopMatch.bestTreeEntry is None) \ or (resultTree[resultIndex].pointOnLink.refDist < stopMatch.bestTreeEntry.pointOnLink.refDist): # Log the best match: stopMatch.bestTreeEntry = resultTree[ resultIndex] stopMatch.matchCtr += 1 resultIndex = curResultIndex + 1 curResultIndex += 1 if (stopMatch and stopMatch.matchCtr == 0) \ or ((curResultIndex < len(resultTree)) and (resultTree[resultIndex].pointOnLink.link.id == linkID)): continue # We have gotten to the end of matched link(s). break if rejectFlag: break # Then, output the results out if we are supposed to. foundStopSet = set() "@type foundStopSet: set<int>" if not rejectFlag: outSeqCtr = longestStart minTime = warmupStartTime maxTime = cooldownEndTime foundValidStop = False for stopMatch in stopMatches: if stopMatch.matchCtr > 1: # Report duplicates: print("WARNING: %d stops have been matched for TripID %d, LinkID %d. Keeping Stop %d, Stop Seq %d" \ % (stopMatch.matchCtr, tripID, stopMatch.linkID, gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID, stopMatch.bestTreeEntry.shapeEntry.shapeSeq), file = sys.stderr) # TODO: This is a problem because VISTA only allows one stop per link. So, the stop that is closest to # the link is the one that is the winner and the rest are ignored. We don't yet do anything intelligent with dwell # times, etc. if stopMatch.matchCtr > 0: # Report the best match: foundStopSet.add( stopMatch.bestTreeEntry.shapeEntry.shapeSeq ) # Check off this stop sequence. foundValidStop = True print( '"%d","%d","%d","%d","%d",' % (tripID, outSeqCtr, stopMatch.linkID, gtfsStopsLookup[stopMatch.bestTreeEntry. shapeEntry.shapeSeq].stop.stopID, DWELLTIME_DEFAULT), file=outFile) if gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID in ret \ and ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID].link.id \ != stopMatch.bestTreeEntry.pointOnLink.link.id: print("WARNING: stopID %d is attempted to be assigned to linkID %d, but it had already been assigned to linkID %d." \ % (gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID, stopMatch.bestTreeEntry.pointOnLink.link.id, ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID].link.id), file = sys.stderr) # TODO: This is a tricky problem. This means that among multiple bus routes, the same stop had been # found to best fit two different links. I don't exactly know the best way to resolve this, other # than (for NMC analyses) to create a "fake" stop that's tied with the new link. else: ret[gtfsStopsLookup[stopMatch.bestTreeEntry. shapeEntry.shapeSeq].stop. stopID] = stopMatch.bestTreeEntry.pointOnLink # Check on the minimum/maximum time range: gtfsStopTime = gtfsStopsLookup[ stopMatch.bestTreeEntry.shapeEntry.shapeSeq] minTime = min(gtfsStopTime.arrivalTime, minTime) maxTime = max(gtfsStopTime.arrivalTime, maxTime) else: # The linkID has nothing to do with any points in consideration. Report it without a stop: if foundValidStop or not excludeUpstream: print('"%d","%d","%d",,,' % (tripID, outSeqCtr, stopMatch.linkID), file=outFile) outSeqCtr += 1 # TODO: For start time estimation (as reported in the public.bus_frequency.csv output), it may be # ideal to keep track of linear distance traveled before the first valid stop. # Widen out the valid interval if needed: warmupStartTime = min(minTime, warmupStartTime) cooldownEndTime = max(maxTime, cooldownEndTime) # Are there any stops left over? If so, report them to say that they aren't in the output file. startGap = -1 endGap = -1 for gtfsStopTime in stopTimes: "@type gtfsStopTime: gtfs.StopTimesEntry" flag = False if gtfsStopTime.stopSeq not in foundStopSet: # This stop is unaccounted for: if startGap < 0: startGap = gtfsStopTime.stopSeq endGap = gtfsStopTime.stopSeq # Old message is very annoying, especially if the underlying topology is a subset of shapefile # geographic area and there's a ton of them. That's why there is the new range message as shown below. # print("WARNING: Trip tripID %d, stopID %d stop seq. %d will not be in the bus_route_link file." % (tripID, # gtfsStopTime.stop.stopID, gtfsStopTime.stopSeq), file = sys.stderr) if problemReport: revisedNodeList = problemReportNodes[ gtfsTrips[tripID].shapeEntries[0].shapeID] if gtfsStopTime.stopSeq not in revisedNodeList: # Make a dummy "error" node for reporting. newShape = gtfs.ShapesEntry( gtfsTrips[tripID].shapeEntries[0].shapeID, gtfsStopTime.stopSeq, gtfsStopTime.stop.gpsLat, gtfsStopTime.stop.gpsLng, False) newPointOnLink = graph.PointOnLink(None, 0) newPointOnLink.pointX = gtfsStopTime.stop.pointX newPointOnLink.pointY = gtfsStopTime.stop.pointY newNode = path_engine.PathEnd( newShape, newPointOnLink) newNode.restart = True revisedNodeList[gtfsStopTime.stopSeq] = newNode else: flag = True if (flag or gtfsStopTime.stopSeq == stopTimes[-1].stopSeq) and startGap >= 0: subStr = "Seqs. %d-%d" % ( startGap, endGap) if startGap != endGap else "Seq. %d" % startGap print( "WARNING: Trip ID %d, Stop %s will not be in the bus_route_link file." % (tripID, subStr), file=sys.stderr) startGap = -1 else: print("WARNING: No links for tripID %d." % tripID, file=sys.stderr) # Deal with Problem Report: if problemReport: print("INFO: Output problem report CSV...", file=sys.stderr) problemReportNodesOut = {} for shapeID in problemReportNodes: seqs = compat.listkeys(problemReportNodes[shapeID]) seqs.sort() ourTgtList = [] for seq in seqs: ourTgtList.append(problemReportNodes[shapeID][seq]) problemReportNodesOut[shapeID] = ourTgtList problem_report.problemReport(problemReportNodesOut, vistaNetwork) return (ret, warmupStartTime, cooldownEndTime)
def main(argv): global problemReport excludeUpstream = False # Initialize from command-line parameters: if len(argv) < 7: syntax(1) dbServer = argv[1] networkName = argv[2] userName = argv[3] password = argv[4] shapePath = argv[5] pathMatchFilename = argv[6] endTimeInt = 86400 refTime = None widenBegin = False widenEnd = False excludeBegin = False excludeEnd = False restrictService = set() "@type restrictService: set<string>" if len(argv) > 6: i = 7 while i < len(argv): if argv[i] == "-t" and i < len(argv) - 1: refTime = datetime.strptime(argv[i + 1], '%H:%M:%S') i += 1 elif argv[i] == "-e" and i < len(argv) - 1: endTimeInt = int(argv[i + 1]) i += 1 elif argv[i] == "-c" and i < len(argv) - 1: restrictService.add(argv[i + 1]) i += 1 elif argv[i] == "-u": excludeUpstream = True elif argv[i] == "-w": widenBegin = True widenEnd = True elif argv[i] == "-wb": widenBegin = True elif argv[i] == "-we": widenEnd = True elif argv[i] == "-x": excludeBegin = True excludeEnd = True elif argv[i] == "-xb": excludeBegin = True elif argv[i] == "-xe": excludeEnd = True elif argv[i] == "-p": problemReport = True i += 1 if refTime is None: print("ERROR: No reference time is specified. You must use the -t parameter.", file = sys.stderr) syntax(1) endTime = refTime + timedelta(seconds = endTimeInt) if widenBegin and excludeBegin: print("ERROR: Widening (-w or -wb) and exclusion (-x or -xb) cannot be used together.") syntax(1) if widenEnd and excludeEnd: print("ERROR: Widening (-w or -we) and exclusion (-x or -xe) cannot be used together.") syntax(1) # Default parameters: stopSearchRadius = 800 # Restore the stuff that was built with path_match: (vistaGraph, gtfsShapes, gtfsNodes, unusedShapeIDs) = restorePathMatch(dbServer, networkName, userName, password, shapePath, pathMatchFilename) # Read in the stuff from GTFS that further defines buses: _, gtfsStops, gtfsTrips, gtfsStopTimes = readBusRecords(shapePath, vistaGraph, gtfsShapes, unusedShapeIDs, restrictService) # Output the routes_link file: print("INFO: Dumping public.bus_route_link.csv...", file = sys.stderr) with open("public.bus_route_link.csv", 'w') as outFile: (stopLinkMap, newStartTime, newEndTime) = dumpBusRouteLinks(gtfsTrips, gtfsStopTimes, gtfsNodes, vistaGraph, stopSearchRadius, excludeUpstream, userName, networkName, refTime, endTime, widenBegin, widenEnd, excludeBegin, excludeEnd, outFile) "@type stopLinkMap: dict<int, graph.PointOnLink>" # Filter only to bus stops and stop times that are used in the routes_link output: gtfsStopsFilterList = [gtfsStopID for gtfsStopID in gtfsStops if gtfsStopID not in stopLinkMap] for gtfsStopID in gtfsStopsFilterList: del gtfsStops[gtfsStopID] del gtfsStopsFilterList # Then, output the output the stop file: print("INFO: Dumping public.bus_stop.csv...", file = sys.stderr) with open("public.bus_stop.csv", 'w') as outFile: dumpBusStops(gtfsStops, stopLinkMap, userName, networkName, outFile) print("INFO: Dumping public.bus_frequency.csv...", file = sys.stderr) validTrips = {} "@type validTrips: dict<int, gtfs.TripsEntry>" with open("public.bus_frequency.csv", 'w') as outFile: _outHeader("public.bus_frequency", userName, networkName, outFile) print("\"route\",\"period\",\"frequency\",\"offsettime\",\"preemption\"", file = outFile) # Okay, here we iterate through stops until we get to the first defined one. That will # then affect the offsettime. (This is needed because of the idea that we want to start # a bus in the simulation wrt the topology that supports it, skipping those stops that # may fall outside the topology.) totalCycle = int((newEndTime - newStartTime).total_seconds()) tripIDs = compat.listkeys(gtfsTrips) tripIDs.sort() for tripID in tripIDs: stopsEntries = gtfsStopTimes[gtfsTrips[tripID]] for gtfsStopTime in stopsEntries: if gtfsStopTime.stop.stopID in gtfsStops: # Here is a first valid entry! Use this offset value. # TODO: This could be inaccurate because the offset is that of the first # valid stop time encountered in the underlying topology, not approximated # to the first valid link encountered. While this isn't a big deal for an # area with a high stop density, it could be a problem for limited-stop # service where there happens to be a low density around where the bus # first appears in the underlying topology. stopTime = gtfsStopTime.arrivalTime # Adjust for cases where we need to add a day. if stopTime < newStartTime: # Assume that we're working just within a day. stopTime += timedelta(days = int((newStartTime - stopTime).total_seconds()) / 86400 + 1) print("%d,1,%d,%d,0" % (tripID, totalCycle, int((stopTime - newStartTime).total_seconds())), file = outFile) validTrips[tripID] = gtfsTrips[tripID] # Record as valid. break # A byproduct of this scheme is that no bus_frequency entry will appear for # routes that don't have stops in the underlying topology. # Output the routes file: print("INFO: Dumping public.bus_route.csv...", file = sys.stderr) with open("public.bus_route.csv", 'w') as outFile: dumpBusRoutes(validTrips, userName, networkName, outFile) # Finally, define one period that spans the whole working time, which all of the individually # defined routes (again, one route per trip) will operate in. print("INFO: Dumping public.bus_period.csv...", file = sys.stderr) with open("public.bus_period.csv", 'w') as outFile: _outHeader("public.bus_period", userName, networkName, outFile) print("\"id\",\"starttime\",\"endtime\"", file = outFile) # The start time printed here is relative to the reference time. print("1,0,%d" % endTimeInt, file = outFile) if widenBegin or widenEnd: # Report the implicit adjustment in times because of warmup or cooldown: startTimeDiff = refTime - newStartTime endTimeDiff = newEndTime - endTime print("INFO: Widening requires start %d sec. earlier and duration %d sec. longer." % (startTimeDiff.total_seconds(), endTimeDiff.total_seconds() + startTimeDiff.total_seconds()), file = sys.stderr) totalTimeDiff = newEndTime - newStartTime print("INFO: New time reference is %s, duration %d sec." % (newStartTime.strftime("%H:%M:%S"), totalTimeDiff.total_seconds()), file = sys.stderr) print("INFO: Done.", file = sys.stderr)
def main(argv): # Initialize from command-line parameters: if len(argv) < 7: syntax(0) dbServer = argv[1] networkName = argv[2] userName = argv[3] password = argv[4] gdbFilename = argv[5] gdbPathMatch = argv[6] sourceID = 0 endTime = 86400 refTime = None problemReport = False gdbReportFlag = False if len(argv) > 6: i = 7 while i < len(argv): if argv[i] == "-s" and i < len(argv) - 1: sourceID = int(argv[i + 1]) i += 1 elif argv[i] == "-t" and i < len(argv) - 1: refTime = datetime.strptime(argv[i + 1], '%H:%M:%S') i += 1 elif argv[i] == "-e" and i < len(argv) - 1: endTime = int(argv[i + 1]) i += 1 elif argv[i] == "-p": problemReport = True elif argv[i] == "-g": gdbReportFlag = True i += 1 if refTime is None and not problemReport: print("ERROR: No reference time is specified.") syntax(1) if problemReport and gdbReportFlag: print("ERROR: Cannot output both a problem report and a GDB report.") syntax(1) # Get the database connected: print("INFO: Connect to database...", file = sys.stderr) database = vista_network.connect(dbServer, userName, password, networkName) # Read in the topology from the VISTA database: print("INFO: Read topology from database...", file = sys.stderr) vistaGraph = vista_network.fillGraph(database) # Read in the GPS track information: print("INFO: Read GDB GPS track '%s'..." % gdbFilename, file = sys.stderr) gpsTracks = gdb_extracted.fillFromFile(gdbFilename, vistaGraph.gps) # Restore the path match: print("INFO: Read the GDB path-match file '%s'..." % gdbPathMatch, file = sys.stderr) with open(gdbPathMatch, 'r') as inFile: nodes = path_engine.readStandardDump(vistaGraph, gpsTracks, inFile, lambda x: str(x)) # Assumption: Each shapeID corresponds with one trip that will be reported in the output. # And, each route corresponds with one trip. # Filter out nodes that have one or zero links: for shapeID in compat.listkeys(nodes): ctr = 0 for node in nodes[shapeID]: ctr += len(node.routeInfo) if ctr <= 1: print("INFO: Filtering out shapeID %s." % str(shapeID), file = sys.stderr) del nodes[shapeID] del gpsTracks[shapeID] # Deal with Problem Report: if problemReport: print("INFO: Output problem report CSV...", file = sys.stderr) problem_report.problemReport(nodes, vistaGraph) print("INFO: Done.", file = sys.stderr) return if gdbReportFlag: print("INFO: Output GDB report CSV...", file = sys.stderr) gdbReport(nodes, vistaGraph) print("INFO: Done.", file = sys.stderr) return # TODO: The logic below is a hack to create unique routes given GDB IDs. There are several # long-term problems with this, including the idea that it is impossible to reuse common # routes (each instance is its own route) and there are assumptions about vehicle ID # numbering in the generated vehicles. # Fabricate routes: routes = {} ctr = 1 # We'll be making arbitrary route IDs: for shapeID in gpsTracks: routes[ctr] = gtfs.RoutesEntry(ctr, shapeID, "") ctr += 1 # Let vehicle IDs be in a different number range: vehCtr = int(ctr / 10000) vehCtr += 10000 # Fabricate trips and stop times: trips = {} stopTimes = {} for routeID in routes: trips[vehCtr] = gtfs.TripsEntry(vehCtr, routes[routeID], "", gpsTracks[routes[routeID].shortName]) stopTimes[trips[vehCtr]] = list() # Fake the system by having no stops defined. vehCtr += 1 tripIDs = compat.listkeys(trips) tripIDs.sort() # Output the routes file: print("INFO: Dumping public.bus_route.csv...", file = sys.stderr) with open("public.bus_route.csv", 'w') as outFile: transit_gtfs.dumpBusRoutes(trips, userName, networkName, outFile) # Output the routes_link file: print("INFO: Dumping public.bus_route_link.csv...", file = sys.stderr) with open("public.bus_route_link.csv", 'w') as outFile: transit_gtfs.dumpBusRouteLinks(trips, stopTimes, nodes, vistaGraph, 1, False, userName, networkName, refTime, endTime, False, False, False, False, outFile) print("INFO: Dumping public.bus_frequency.csv...", file = sys.stderr) with open("public.bus_frequency.csv", 'w') as outFile: transit_gtfs._outHeader("public.bus_frequency", userName, networkName, outFile) print("\"route\",\"period\",\"frequency\",\"offsettime\",\"preemption\"", file = outFile) for tripID in tripIDs: departureTime = trips[tripID].shapeEntries[0].time timeDiff = departureTime - refTime print("%d,1,86400,%d,0" % (tripID, timeDiff.days * 24 * 3600 + timeDiff.seconds), file = outFile) print("INFO: Dumping public.bus_period.csv...", file = sys.stderr) with open("public.bus_period.csv", 'w') as outFile: transit_gtfs._outHeader("public.bus_period", userName, networkName, outFile) print("\"id\",\"starttime\",\"endtime\"", file = outFile) print("1,0,%d" % endTime, file = outFile) # Now we need to write out to the travel_time output: print("INFO: Dumping public.travel_time.csv...", file = sys.stderr) with open("public.travel_time.csv", 'w') as outFile: transit_gtfs._outHeader("public.travel_time", userName, networkName, outFile) print("\"departure_time\",\"vehicle_id\",\"route_id\",\"exittime\",\"linkid\",\"arrivaltime\",\"sourceid\"", file = outFile) for tripID in tripIDs: nodeList = nodes[trips[tripID].route.shortName] "@type nodeList: list<path_engine.PathEnd>" departureTime = trips[tripID].shapeEntries[0].time lastTime = trips[tripID].shapeEntries[-1].time # Add the first link to the file: timeDiff = departureTime - refTime timeDiffLast = lastTime - refTime outStr = "%d,%d,%d,%d,%d,%d,%d" % (timeDiff.days * 24 * 3600 + timeDiff.seconds, trips[tripID].route.routeID, tripID, timeDiffLast.days * 24 * 3600 + timeDiffLast.seconds, nodeList[0].pointOnLink.link.id, timeDiff.days * 24 * 3600 + timeDiff.seconds, sourceID) print(outStr, file = outFile) for node in nodeList: "@type node: path_engine.PathEnd" if len(node.routeInfo) > 0: # TODO: Deal with midnight if the time is before refTime. arrivalTime = node.shapeEntry.time for link in node.routeInfo: arrivalTimeSec = 3600 * arrivalTime.hour + 60 * arrivalTime.minute + arrivalTime.second # TODO: We need to make vehicleID, routeID and tripID be consistent. timeDiffArr = arrivalTime - refTime outStr = "%d,%d,%d,%d,%d,%d,%d" % (timeDiff.days * 24 * 3600 + timeDiff.seconds, trips[tripID].route.routeID, tripID, timeDiffLast.days * 24 * 3600 + timeDiffLast.seconds, link.id, timeDiffArr.days * 24 * 3600 + timeDiffArr.seconds, sourceID) print(outStr, file = outFile) print("INFO: Done.", file = sys.stderr)
def dumpBusRouteLinks(gtfsTrips, gtfsStopTimes, gtfsNodes, vistaNetwork, stopSearchRadius, excludeUpstream, userName, networkName, startTime, endTime, widenBegin, widenEnd, excludeBegin, excludeEnd, outFile = sys.stdout): """ dumpBusRouteLinks dumps out a public.bus_route_link.csv file contents. This also will remove all stop times and trips that fall outside of the valid evaluation interval as dictated by the exclusion parameters. @type gtfsTrips: dict<int, gtfs.TripsEntry> @type gtfsStopTimes: dict<TripsEntry, list<StopTimesEntry>> @type gtfsNodes: dict<int, list<path_engine.PathEnd>> @type vistaNetwork: graph.GraphLib @type stopSearchRadius: float @type excludeUpstream: boolean @type userName: str @type networkName: str @type startTime: datetime @type endTime: datetime @type widenBegin: bool @type widenEnd: bool @type excludeBegin: bool @type excludeEnd: bool @type outFile: file @return A mapping of stopID to points-on-links plus the start and end times adjusted for warm-up and cool-down (if widenBegin or widenEnd is True) @rtype (dict<int, graph.PointOnLink>, datetime, datetime) """ _outHeader("public.bus_route_link", userName, networkName, outFile) print('"route","sequence","link","stop","dwelltime",', file = outFile) # Set up the output: ret = {} "@type ret: dict<int, graph.PointOnLink>" warmupStartTime = startTime cooldownEndTime = endTime # Initialize the path engine for use later: pathEngine = path_engine.PathEngine(stopSearchRadius, stopSearchRadius, stopSearchRadius, sys.float_info.max, sys.float_info.max, stopSearchRadius, 1, 1, 1, sys.maxsize, sys.maxsize) pathEngine.limitClosestPoints = 8 pathEngine.limitSimultaneousPaths = 6 pathEngine.maxHops = 12 pathEngine.logFile = None # Suppress the log outputs for the path engine; enough stuff will come from other sources. problemReportNodes = {} "@type problemReportNodes: dict<?, path_engine.PathEnd>" tripIDs = compat.listkeys(gtfsTrips) tripIDs.sort() for tripID in tripIDs: if gtfsTrips[tripID].shapeEntries[0].shapeID not in gtfsNodes: # This happens if the incoming files contain a subset of all available topology. print("WARNING: Skipping route for trip %d because no points are available." % tripID, file = sys.stderr) continue treeNodes = gtfsNodes[gtfsTrips[tripID].shapeEntries[0].shapeID] "@type treeNodes: list<path_engine.PathEnd>" # Step 1: Find the longest distance of contiguous valid links within the shape for each trip: startIndex = -1 curIndex = 0 linkCount = 0 totalLinks = 0 longestStart = -1 longestEnd = len(treeNodes) longestDist = sys.float_info.min longestLinkCount = 0 while curIndex <= len(treeNodes): if (curIndex == len(treeNodes)) or (curIndex == 0) or treeNodes[curIndex].restart: totalLinks += 1 linkCount += 1 if (curIndex > startIndex) and (startIndex >= 0): # We have a contiguous interval. See if it wins: if treeNodes[curIndex - 1].totalDist - treeNodes[startIndex].totalDist > longestDist: longestStart = startIndex longestEnd = curIndex longestDist = treeNodes[curIndex - 1].totalDist - treeNodes[startIndex].totalDist longestLinkCount = linkCount linkCount = 0 # This happens if it is time to start a new interval: startIndex = curIndex else: totalLinks += len(treeNodes[curIndex].routeInfo) linkCount += len(treeNodes[curIndex].routeInfo) curIndex += 1 if longestStart >= 0: # We have a valid path. See if it had been trimmed down and report it. if (longestStart > 0) or (longestEnd < len(treeNodes)): print("WARNING: For shape ID %s from seq. %d through %d, %.2g%% of %d links will be used." \ % (str(treeNodes[longestStart].shapeEntry.shapeID), treeNodes[longestStart].shapeEntry.shapeSeq, treeNodes[longestEnd - 1].shapeEntry.shapeSeq, 100 * float(longestLinkCount) / float(totalLinks), totalLinks), file = sys.stderr) # Step 2: Ignore routes that are entirely outside our valid time interval. flag = False if len(gtfsStopTimes[gtfsTrips[tripID]]) == 0: # This will happen if we don't have stops defined. In this case, we want to go ahead and process the bus_route_link # outputs because we don't know if the trip falls in or out of the valid time range. flag = True else: for stopEntry in gtfsStopTimes[gtfsTrips[tripID]]: if stopEntry.arrivalTime >= startTime and stopEntry.arrivalTime <= endTime: flag = True break if not flag: # This will be done silently because (depending upon the valid interval) there could be # hundreds of these in a GTFS set. continue # Step 3: Match up stops to that contiguous list: # At this point, we're doing something with this. print("INFO: -- Matching stops for trip %d --" % tripID, file = sys.stderr) stopTimes = gtfsStopTimes[gtfsTrips[tripID]] "@type stopTimes: list<gtfs.StopTimesEntry>" # Isolate the relevant VISTA tree nodes: (Assume from above that this is a non-zero length array) ourGTFSNodes = treeNodes[longestStart:longestEnd] # We are going to recreate a small VISTA network from ourGTFSNodes and then match up the stops to that. # First, prepare the small VISTA network: vistaSubset = graph.GraphLib(vistaNetwork.gps.latCtr, vistaNetwork.gps.lngCtr) vistaNodePrior = None "@type vistaNodePrior: graph.GraphNode" # Build a list of links: outLinkIDList = [] "@type outLinkList: list<int>" # Plop in the start node: vistaNodePrior = graph.GraphNode(ourGTFSNodes[0].pointOnLink.link.origNode.id, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLat, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLng) vistaSubset.addNode(vistaNodePrior) outLinkIDList.append(ourGTFSNodes[0].pointOnLink.link.id) # Link together nodes as we traverse through them: for ourGTFSNode in ourGTFSNodes: "@type ourGTFSNode: path_engine.PathEnd" # There should only be one destination link per VISTA node because this comes form our tree. # If there is no link or we're repeating the first one, then there were no new links assigned. if (len(ourGTFSNode.routeInfo) < 1) or ((len(outLinkIDList) == 1) \ and (ourGTFSNode.routeInfo[0].id == ourGTFSNodes[0].pointOnLink.link.id)): continue for link in ourGTFSNode.routeInfo: "@type link: graph.GraphLink" if link.id not in vistaNetwork.linkMap: print("WARNING: In finding bus route links, link ID %d is not found in the VISTA network." % link.id, file = sys.stderr) continue origVistaLink = vistaNetwork.linkMap[link.id] "@type origVistaLink: graph.GraphLink" if origVistaLink.origNode.id not in vistaSubset.nodeMap: # Create a new node: vistaNode = graph.GraphNode(origVistaLink.origNode.id, origVistaLink.origNode.gpsLat, origVistaLink.origNode.gpsLng) vistaSubset.addNode(vistaNode) else: # The path evidently crosses over itself. Reuse an existing node. vistaNode = vistaSubset.nodeMap[origVistaLink.origNode.id] # We shall label our links as indices into the stage we're at in ourGTFSNodes links. This will allow for access later. if outLinkIDList[-1] not in vistaSubset.linkMap: vistaSubset.addLink(graph.GraphLink(outLinkIDList[-1], vistaNodePrior, vistaNode)) vistaNodePrior = vistaNode outLinkIDList.append(link.id) # And then finish off the graph with the last link: if ourGTFSNode.pointOnLink.link.destNode.id not in vistaSubset.nodeMap: vistaNode = graph.GraphNode(ourGTFSNode.pointOnLink.link.destNode.id, ourGTFSNode.pointOnLink.link.destNode.gpsLat, ourGTFSNode.pointOnLink.link.destNode.gpsLng) vistaSubset.addNode(vistaNode) if outLinkIDList[-1] not in vistaSubset.linkMap: vistaSubset.addLink(graph.GraphLink(outLinkIDList[-1], vistaNodePrior, vistaNode)) # Then, prepare the stops as GTFS shapes entries: print("INFO: Mapping stops to VISTA network...", file = sys.stderr) gtfsShapes = [] gtfsStopsLookup = {} "@type gtfsStopsLookup: dict<int, gtfs.StopTimesEntry>" # Append an initial dummy shape to force routing through the path start: gtfsShapes.append(gtfs.ShapesEntry(-1, -1, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLat, ourGTFSNodes[0].pointOnLink.link.origNode.gpsLng)) # Append all of the stops: for gtfsStopTime in stopTimes: "@type gtfsStopTime: gtfs.StopTimesEntry" gtfsShapes.append(gtfs.ShapesEntry(-1, gtfsStopTime.stopSeq, gtfsStopTime.stop.gpsLat, gtfsStopTime.stop.gpsLng)) gtfsStopsLookup[gtfsStopTime.stopSeq] = gtfsStopTime # Append a trailing dummy shape to force routing through the path end: gtfsShapes.append(gtfs.ShapesEntry(-1, -1, ourGTFSNodes[-1].pointOnLink.link.destNode.gpsLat, ourGTFSNodes[-1].pointOnLink.link.destNode.gpsLng)) # Find a path through our prepared node map subset: resultTree = pathEngine.constructPath(gtfsShapes, vistaSubset) "@type resultTree: list<path_engine.PathEnd>" # Strip off the dummy ends: del resultTree[-1] del resultTree[0] if len(resultTree) > 0: resultTree[0].prevTreeNode = None # So now we should have one tree entry per matched stop. # Deal with Problem Report: # TODO: The Problem Report will include all nodes on each path regardless of valid time interval; # However; we will not have gotten here if the trip was entirely outside of it. if problemReport: revisedNodeList = {} prevNode = None "@type revisedNodeList = list<path_engine.PathEnd>" for stopNode in resultTree: # Reconstruct a tree node in terms of the original network. newShape = gtfs.ShapesEntry(gtfsTrips[tripID].shapeEntries[0].shapeID, stopNode.shapeEntry.shapeSeq, stopNode.shapeEntry.lat, stopNode.shapeEntry.lng, False) origLink = vistaNetwork.linkMap[stopNode.pointOnLink.link.id] newPointOnLink = graph.PointOnLink(origLink, stopNode.pointOnLink.dist, stopNode.pointOnLink.nonPerpPenalty, stopNode.pointOnLink.refDist) newNode = path_engine.PathEnd(newShape, newPointOnLink) newNode.restart = False newNode.totalCost = stopNode.totalCost newNode.totalDist = stopNode.totalDist newNode.routeInfo = [] for link in stopNode.routeInfo: newNode.routeInfo.append(vistaNetwork.linkMap[link.id]) newNode.prevTreeNode = prevNode prevNode = newNode revisedNodeList[stopNode.shapeEntry.shapeSeq] = newNode problemReportNodes[gtfsTrips[tripID].shapeEntries[0].shapeID] = revisedNodeList # Walk through our output link list and see where the resultTree entries occur: resultIndex = 0 stopMatches = [] "@type stopMatches: list<StopMatch>" rejectFlag = False for linkID in outLinkIDList: curResultIndex = resultIndex # This routine will advance resultIndex only if a stop is found for linkID, and will exit out when # no more stops are found for linkID. stopMatch = StopMatch(linkID) "@type stopMatch: StopMatch" stopMatches.append(stopMatch) while curResultIndex < len(resultTree): if resultTree[curResultIndex].pointOnLink.link.id == linkID: # Only pay attention to this stop if it is within the valid time range: gtfsStopTime = gtfsStopsLookup[resultTree[resultIndex].shapeEntry.shapeSeq] if excludeBegin and gtfsStopTime.arrivalTime < startTime or excludeEnd and gtfsStopTime.arrivalTime > endTime: # Throw away this entire route because it is excluded and part of it falls outside: print("INFO: Excluded because of activity outside of the valid time range.", file = sys.stderr) del stopMatches[:] rejectFlag = True break elif (widenBegin or gtfsStopTime.arrivalTime >= startTime) and (widenEnd or gtfsStopTime.arrivalTime <= endTime): if (stopMatch.bestTreeEntry is None) \ or (resultTree[resultIndex].pointOnLink.refDist < stopMatch.bestTreeEntry.pointOnLink.refDist): # Log the best match: stopMatch.bestTreeEntry = resultTree[resultIndex] stopMatch.matchCtr += 1 resultIndex = curResultIndex + 1 curResultIndex += 1 if (stopMatch and stopMatch.matchCtr == 0) \ or ((curResultIndex < len(resultTree)) and (resultTree[resultIndex].pointOnLink.link.id == linkID)): continue # We have gotten to the end of matched link(s). break if rejectFlag: break # Then, output the results out if we are supposed to. foundStopSet = set() "@type foundStopSet: set<int>" if not rejectFlag: outSeqCtr = longestStart minTime = warmupStartTime maxTime = cooldownEndTime foundValidStop = False for stopMatch in stopMatches: if stopMatch.matchCtr > 1: # Report duplicates: print("WARNING: %d stops have been matched for TripID %d, LinkID %d. Keeping Stop %d, Stop Seq %d" \ % (stopMatch.matchCtr, tripID, stopMatch.linkID, gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID, stopMatch.bestTreeEntry.shapeEntry.shapeSeq), file = sys.stderr) # TODO: This is a problem because VISTA only allows one stop per link. So, the stop that is closest to # the link is the one that is the winner and the rest are ignored. We don't yet do anything intelligent with dwell # times, etc. if stopMatch.matchCtr > 0: # Report the best match: foundStopSet.add(stopMatch.bestTreeEntry.shapeEntry.shapeSeq) # Check off this stop sequence. foundValidStop = True print('"%d","%d","%d","%d","%d",' % (tripID, outSeqCtr, stopMatch.linkID, gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID, DWELLTIME_DEFAULT), file = outFile) if gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID in ret \ and ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID].link.id \ != stopMatch.bestTreeEntry.pointOnLink.link.id: print("WARNING: stopID %d is attempted to be assigned to linkID %d, but it had already been assigned to linkID %d." \ % (gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID, stopMatch.bestTreeEntry.pointOnLink.link.id, ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID].link.id), file = sys.stderr) # TODO: This is a tricky problem. This means that among multiple bus routes, the same stop had been # found to best fit two different links. I don't exactly know the best way to resolve this, other # than (for NMC analyses) to create a "fake" stop that's tied with the new link. else: ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID] = stopMatch.bestTreeEntry.pointOnLink # Check on the minimum/maximum time range: gtfsStopTime = gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq] minTime = min(gtfsStopTime.arrivalTime, minTime) maxTime = max(gtfsStopTime.arrivalTime, maxTime) else: # The linkID has nothing to do with any points in consideration. Report it without a stop: if foundValidStop or not excludeUpstream: print('"%d","%d","%d",,,' % (tripID, outSeqCtr, stopMatch.linkID), file = outFile) outSeqCtr += 1 # TODO: For start time estimation (as reported in the public.bus_frequency.csv output), it may be # ideal to keep track of linear distance traveled before the first valid stop. # Widen out the valid interval if needed: warmupStartTime = min(minTime, warmupStartTime) cooldownEndTime = max(maxTime, cooldownEndTime) # Are there any stops left over? If so, report them to say that they aren't in the output file. startGap = -1 endGap = -1 for gtfsStopTime in stopTimes: "@type gtfsStopTime: gtfs.StopTimesEntry" flag = False if gtfsStopTime.stopSeq not in foundStopSet: # This stop is unaccounted for: if startGap < 0: startGap = gtfsStopTime.stopSeq endGap = gtfsStopTime.stopSeq # Old message is very annoying, especially if the underlying topology is a subset of shapefile # geographic area and there's a ton of them. That's why there is the new range message as shown below. # print("WARNING: Trip tripID %d, stopID %d stop seq. %d will not be in the bus_route_link file." % (tripID, # gtfsStopTime.stop.stopID, gtfsStopTime.stopSeq), file = sys.stderr) if problemReport: revisedNodeList = problemReportNodes[gtfsTrips[tripID].shapeEntries[0].shapeID] if gtfsStopTime.stopSeq not in revisedNodeList: # Make a dummy "error" node for reporting. newShape = gtfs.ShapesEntry(gtfsTrips[tripID].shapeEntries[0].shapeID, gtfsStopTime.stopSeq, gtfsStopTime.stop.gpsLat,gtfsStopTime.stop.gpsLng, False) newPointOnLink = graph.PointOnLink(None, 0) newPointOnLink.pointX = gtfsStopTime.stop.pointX newPointOnLink.pointY = gtfsStopTime.stop.pointY newNode = path_engine.PathEnd(newShape, newPointOnLink) newNode.restart = True revisedNodeList[gtfsStopTime.stopSeq] = newNode else: flag = True if (flag or gtfsStopTime.stopSeq == stopTimes[-1].stopSeq) and startGap >= 0: subStr = "Seqs. %d-%d" % (startGap, endGap) if startGap != endGap else "Seq. %d" % startGap print("WARNING: Trip ID %d, Stop %s will not be in the bus_route_link file." % (tripID, subStr), file = sys.stderr) startGap = -1 else: print("WARNING: No links for tripID %d." % tripID, file = sys.stderr) # Deal with Problem Report: if problemReport: print("INFO: Output problem report CSV...", file = sys.stderr) problemReportNodesOut = {} for shapeID in problemReportNodes: seqs = compat.listkeys(problemReportNodes[shapeID]) seqs.sort() ourTgtList = [] for seq in seqs: ourTgtList.append(problemReportNodes[shapeID][seq]) problemReportNodesOut[shapeID] = ourTgtList problem_report.problemReport(problemReportNodesOut, vistaNetwork) return (ret, warmupStartTime, cooldownEndTime)