def test_gtfs(self): self.feed = gtfs_kit.read_feed( "http://iportal.sacrt.com/gtfs/SRTD/google_transit.zip", dist_units="mi") self.assertEqual(self.feed.agency["agency_id"][0], "SRTD") self.type_zero_stops = get_stops_by_route_type(self.feed, 0) self.assertEqual(len(self.type_zero_stops), 102) plot_schedule(self.feed, "533", "1")
def download_gtfs(url: str) -> gk.feed.Feed: tf = NamedTemporaryFile(delete=False) with get(url, stream=True) as req: for chunk in req.iter_content(chunk_size=128): tf.write(chunk) tf.close() gtfs = gk.read_feed(tf.name, dist_units=_GTFS_UNITS) Path(tf.name).unlink() return gtfs
def import_gtfs_feed(gtfs_filename, transfer_restriction=None, folder=None): """ Import a gtfs feed from the given file. Also check that stop times are ordered and transfers are symmetrical. :param gtfs_filename: name of a gtfs file :param transfer_restriction: duration restriction on the transfers :param folder: folder where the gtfs is stored. Default is the GTFS_FEEDS_FOLDER. :return: gtfs-kit Feed object """ # read the gtfs feed using gtfs-kit if folder is None: folder = gtfs_feeds_folder() path = folder + gtfs_filename feed = gt.read_feed(path, dist_units="km") # additional operations and validations # order stop_times by trip_id and stop_sequence stop_times = feed.stop_times if stop_times is not None: stop_times = stop_times.sort_values(by=["trip_id", "stop_sequence"]) feed.stop_times = stop_times feed.stop_times = feed.stop_times # check that foot-path transfers are symmetrical if feed.transfers is not None: transfer_table = feed.transfers.copy() transfer_table = transfer_table[ transfer_table["from_stop_id"] != transfer_table["to_stop_id"]] transfer_table["stop_A"] = transfer_table[[ "from_stop_id", "to_stop_id" ]].apply(min, axis=1) transfer_table["stop_B"] = transfer_table[[ "from_stop_id", "to_stop_id" ]].apply(max, axis=1) count = transfer_table.groupby(["stop_A", "stop_B"], as_index=False).agg(["count"]) counts_not_equal_to_2 = count[count["min_transfer_time"]["count"] != 2] if not counts_not_equal_to_2.empty: logging.warning( "Transfer table of {} is not symmetrical (in term of arcs, not transfer times)" .format(gtfs_filename)) if not is_transitive( feed.transfers) and transfer_restriction is not None: feed.transfers = transitively_closed_transfers( feed.transfers, transfer_restriction) else: logging.warning("The given GTFS has no transfer table") return feed
def getPolygon(): publicTransportDict = getPublicTransportation() listOfStops = [] for route in publicTransportDict: for node in publicTransportDict[route]: listOfStops.append(node[0]) path = Path(r'Data/networks/GTFS/gtfs.zip') file = 'gtfs.zip' feed = gk.read_feed(path, dist_units='km') return gk.compute_convex_hull(feed, stop_ids=listOfStops)
def build_gtfs_representation(dataset_infos): try: dataset = gtfs_kit.read_feed(dataset_infos.zip_path, dist_units="km") except TypeError as te: raise TypeError( f"Exception '{te}' occurred while reading the GTFS dataset with the GTFS kit library." f"The dataset must be a valid GTFS zip file or URL.\n") except MissingSchema as ms: raise MissingSchema( f"Exception '{ms}' occurred while opening the GTFS dataset with the GTFS kit library." f"The dataset must be a valid GTFS zip file or URL.\n") metadata = GtfsMetadata(dataset_infos) representation = GtfsRepresentation(dataset_infos.entity_code, dataset, metadata) return representation
def storeGTFSasJson(routeType): path = Path(r'Data/networks/GTFS/gtfs.zip') file = 'gtfs.zip' feed = gk.read_feed(path, dist_units='km') if (gk.valid_date('20200706')) is True: date = '20200706' feed = gk.drop_zombies(feed) # getting all necessary dataframes, routeDf = feed.get_routes(date) routeDF = routeDf.filter(items=['route_id', 'route_type']) # active routes on monday of the given mode of transportation routeDF = routeDF[routeDF['route_type'] == routeType] tripsDf = feed.get_trips(date) tripsDf = tripsDf.filter(items=['route_id', 'service_id', 'trip_id']) stop_timesDf = feed.get_stop_times(date) stop_timesDf = stop_timesDf.filter(items=[ 'trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence' ]) stopsDf = feed.get_stops(date) stopsDf = stopsDf.filter( items=['stop_id', 'stop_name', 'stop_lat', 'stop_lon']) # storing routes in list routes = [] for index, row in routeDF.iterrows(): routes.append(row['route_id']) # creating the result Dict publicTransport = {} for i in range(0, len(routes)): publicTransport[routes[i]] = [] # iterating through all the routes; creating nodes and edges print('routes of route_type ' + str(routeType) + ' are getting initialized...') for route in publicTransport: # get all trips for the route tripsOfRouteDf = tripsDf[tripsDf['route_id'] == route] # creating list of trip trips = [] for index, row in tripsOfRouteDf.iterrows(): trips.append(row['trip_id']) tripsOfRouteDf = tripsOfRouteDf[tripsOfRouteDf.trip_id.isin(trips)] # getting stop_times of the route stop_timesOfRouteDf = stop_timesDf[stop_timesDf.trip_id.isin(trips)] # getting edges # dictionary for edges: Dict of Dict edges = {} # find longest trip to get all edges maxLength = 0 maxTrip_id = '' for trip in trips: stops_inTrips = stop_timesOfRouteDf[stop_timesOfRouteDf['trip_id'] == trip] length = len(stops_inTrips) if length > maxLength: maxLength = length maxTrip_id = trip # filter stop times by trip with the most stops stop_timesOfTrip = stop_timesOfRouteDf[stop_timesOfRouteDf['trip_id'] == maxTrip_id] # ordering by stop sequence stop_timesOfTrip = stop_timesOfTrip.filter( items=['stop_id', 'stop_sequence', 'arrival_time']) stop_timesOfTrip = stop_timesOfTrip.sort_values(by=['stop_sequence']) # creating list of tuples to iterate through the stops listOfStop_times = list(map(tuple, stop_timesOfTrip.to_numpy())) valueEdges = {} # enter values of edges in dictionary: cost, travel time, headway for i in range(0, len(listOfStop_times) - 1): edges[(listOfStop_times[i][0], listOfStop_times[i + 1][0])] = valueEdges edges.get((listOfStop_times[i][0], listOfStop_times[i + 1][0]))['cost'] = 0 try: timeDestination = datetime.strptime(listOfStop_times[i + 1][2], '%H:%M:%S') timeStart = datetime.strptime(listOfStop_times[i][2], '%H:%M:%S') travelTimedelta = timeDestination - timeStart travelTime = travelTimedelta.total_seconds() / 60 except ValueError: # hours can have values > 24: convert these values hourDestination = int(listOfStop_times[i + 1][2][0] + listOfStop_times[i + 1][2][1]) hourStart = int(listOfStop_times[i][2][0] + listOfStop_times[i][2][1]) minuteDestination = int(listOfStop_times[i + 1][2][3] + listOfStop_times[i + 1][2][4]) minuteStart = int(listOfStop_times[i][2][3] + listOfStop_times[i][2][4]) secDestination = int(listOfStop_times[i + 1][2][6] + listOfStop_times[i + 1][2][7]) secStart = int(listOfStop_times[i][2][6] + listOfStop_times[i][2][7]) travelTime = (hourDestination * 60 + minuteDestination + secDestination / 60) - ( hourStart * 60 + minuteStart + secStart / 60) if travelTime == 0: travelTime = 0.1 edges.get((listOfStop_times[i][0], listOfStop_times[i + 1][0]))['travelTime'] = travelTime headway = getHeadway(routeType) edges.get((listOfStop_times[i][0], listOfStop_times[i + 1][0]))['headway'] = headway # get nodes from edges # iterate through edges and store ids in a set setNodeids = set() for nodetuple in edges: setNodeids.add(nodetuple[0]) setNodeids.add(nodetuple[1]) stops = stopsDf[stopsDf.stop_id.isin(setNodeids)].filter( items=['stop_id', 'stop_lat', 'stop_lon']) listOfNodes = list(map(tuple, stops.to_numpy())) publicTransport.get(route).append(listOfNodes) # create edges in both directions edgesComplete = {} for key in edges: edgesComplete[key] = edges[key] edgesComplete[(key[1], key[0])] = edges[key] # key in edges needs to be a string in order to store it as json edgesString = {} for key in edgesComplete: edgesString[key[0] + ":" + key[1]] = edgesComplete[key] publicTransport.get(route).append(edgesString) print('number of active routes: ' + str(len(publicTransport))) dataFile = open("data/networks/" + str(routeType) + "gtfs.json", "w") json.dump(publicTransport, dataFile) dataFile = open('data/networks/' + str(routeType) + "gtfs.json", 'r') print("route" + str(routeType) + "initialized") return dataFile.read()
import os import sys from pathlib import Path import importlib sys.path.insert(0, os.path.abspath("..")) import geopandas as gpd import pandas as pd import numpy as np import gtfs_kit import pytest # Load/create test feeds DATA_DIR = Path("data") sample = gtfs_kit.read_feed(DATA_DIR / "sample_gtfs.zip", dist_units="km") cairns = gtfs_kit.read_feed(DATA_DIR / "cairns_gtfs.zip", dist_units="km") cairns_shapeless = cairns.copy() cairns_shapeless.shapes = None t = cairns_shapeless.trips t["shape_id"] = np.nan cairns_shapeless.trips = t week = cairns.get_first_week() cairns_dates = [week[0], week[2]] cairns_trip_stats = pd.read_csv(DATA_DIR / "cairns_trip_stats.csv", dtype=gtfs_kit.DTYPE)
get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') # %% # List feed path = DATA_DIR/'gtfs-germany.zip' gk.list_feed(path) # %% # Read feed and describe path = DATA_DIR/'gtfs-germany.zip' feed = gk.read_feed(path, dist_units='km') feed.describe() # %% # Validate feed.validate() # %% # Append shape_dist_traveled column to stop times display(feed.stop_times.head().T) feed = feed.append_dist_to_stop_times()
def open_gtfs_feed(context, original_filepath): context.log.debug(f"Opening GTFS {original_filepath}") feed = gk.read_feed(original_filepath, dist_units='km') return feed
def read_gtfs(path: Path) -> gk.feed.Feed: return gk.read_feed(path, dist_units=_GTFS_UNITS)