def ParseFile(gtfs_filename, output_prefix): # find the busiest date date, service_ids = ptg.read_busiest_date(gtfs_filename) print("Service id chosen = {0}".format(service_ids)) #Load file twice so that we don't modify it within these functions trips = GenerateTrips(ptg.load_geo_feed(gtfs_filename), date, service_ids) stops = GenerateStops(ptg.load_geo_feed(gtfs_filename)) stop_times = GenerateStopTimes(ptg.load_geo_feed(gtfs_filename)) # road_segs, seg_props = GenerateRoadSegments(ptg.load_geo_feed(gtfs_filename)) trips.to_csv(output_prefix + "_trips.csv", index=False) stops.to_csv(output_prefix + "_stops.csv", index=False) stop_times.to_csv(output_prefix + "_stop_times.csv", index=False)
def update_output(list_of_contents, list_of_names, list_of_dates): for content, name, date in zip(list_of_contents, list_of_names, list_of_dates): # the content needs to be split. It contains the type and the real content content_type, content_string = content.split(',') # Decode the base64 string content_decoded = base64.b64decode(content_string) # Use BytesIO to handle the decoded content zip_str = io.BytesIO(content_decoded) # Now you can use ZipFile to take the BytesIO output zip_obj = ZipFile(zip_str, 'r') with tempfile.TemporaryDirectory() as tmpdirname: zip_obj.extractall(tmpdirname) children = 'created temporary directory' + tmpdirname service_ids = ptg.read_busiest_date(tmpdirname)[1] view = {'trips.txt': {'service_id': service_ids}} feed = ptg.load_geo_feed(tmpdirname, view) routes = feed.routes trips = feed.trips stop_times = feed.stop_times stops = feed.stops shapes = feed.shapes return str(routes.loc[0, 'route_short_name'])
def get_transfers(self, stop_ids): view = {'transfers.txt': {'from_stop_id': stop_ids}} df = ptg.load_geo_feed(self.gtfs_path, view).transfers df = df.set_index(['from_stop_id', 'to_stop_id']) return df
def get_stops(self, stop_ids): view = {'stops.txt': {'stop_id': stop_ids}} df = ptg.load_geo_feed(self.gtfs_path, view).stops df = df.set_index('stop_id') return df
def get_stop_times(self, trip_ids): view = {'stop_times.txt': {'trip_id': trip_ids}} df = ptg.load_geo_feed(self.gtfs_path, view).stop_times df = df.set_index(['trip_id', 'stop_sequence']) return df
def get_trips(self, service_ids): view = {'trips.txt': {'service_id': service_ids}} df = ptg.load_geo_feed(self.gtfs_path, view).trips df = df.set_index('trip_id') return df
def test_load_geo_feed_empty(): gpd = pytest.importorskip("geopandas") feed = ptg.load_geo_feed(fixture("empty")) assert isinstance(feed.shapes, gpd.GeoDataFrame) assert isinstance(feed.stops, gpd.GeoDataFrame) assert feed.shapes.empty assert feed.stops.empty
def GetExtents(gtfs_filename): """Returns a bounding box containing all of the feed's stops. Returns: [minlon, minlat, maxlon, maxlat] """ gtfs = ptg.load_geo_feed(gtfs_filename) bounds = gtfs.stops.total_bounds if np.isnan(bounds).any(): raise Exception("Extents: bounds had a nan!") return bounds.tolist()
def get_feed_df(inpath): print(inpath) _date, service_ids = ptg.read_busiest_date(inpath) # assume it'll be a typical weekday; GO rail is the same every weekday view = { 'trips.txt': { 'service_id': service_ids }, } feed = ptg.load_geo_feed(inpath, view) return feed
def __init__(self, agency: config.Agency): self.agency = agency self.agency_id = agency_id = agency.id gtfs_cache_dir = f'{util.get_data_dir()}/gtfs-{agency_id}' download_gtfs_data(agency, gtfs_cache_dir) self.feed = ptg.load_geo_feed(gtfs_cache_dir, {}) self.stop_times_by_trip = None self.stops_df = None self.trips_df = None self.routes_df = None self.stop_times_df = None self.shapes_df = None self.gtfs_stop_ids_map = None self.stops_map = None
def test_load_geo_feed(): gpd = pytest.importorskip("geopandas") feed = ptg.load_geo_feed(fixture("amazon-2017-08-06")) assert isinstance(feed.shapes, gpd.GeoDataFrame) assert isinstance(feed.stops, gpd.GeoDataFrame) assert {"LineString"} == set(feed.shapes.geom_type) assert {"Point"} == set(feed.stops.geom_type) assert feed.shapes.crs == {"init": "EPSG:4326"} assert feed.stops.crs == {"init": "EPSG:4326"} assert ["shape_id", "geometry"] == list(feed.shapes.columns) assert [ "stop_id", "stop_code", "stop_name", "stop_desc", "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding", "geometry", ] == list(feed.stops.columns)
def __init__(self, inpath, agency, version): self.inpath = inpath self.agency = agency self.version = version self.feed = ptg.load_geo_feed(self.inpath, {})
def save_routes_for_agency(agency: config.Agency, save_to_s3=True): agency_id = agency.id gtfs_cache_dir = f'{util.get_data_dir()}/gtfs-{agency_id}' download_gtfs_data(agency, gtfs_cache_dir) feed = ptg.load_geo_feed(gtfs_cache_dir, {}) print(f"Loading {agency_id} routes...") routes_df = feed.routes if agency.gtfs_agency_id is not None: routes_df = routes_df[routes_df.agency_id == agency.gtfs_agency_id] routes_data = [] print(f"Loading {agency_id} trips...") trips_df = feed.trips trips_df['direction_id'] = trips_df['direction_id'].astype(str) print(f"Loading {agency_id} stop times...") stop_times_df = feed.stop_times print(f"Loading {agency_id} shapes...") shapes_df = feed.shapes print(f"Loading {agency_id} stops...") stops_df = feed.stops # gtfs_stop_ids_map allows looking up row from stops.txt via GTFS stop_id gtfs_stop_ids_map = {stop.stop_id: stop for stop in stops_df.itertuples()} stop_id_gtfs_field = agency.stop_id_gtfs_field # get OpenTransit stop ID for GTFS stop_id (may be the same) def normalize_gtfs_stop_id(gtfs_stop_id): if stop_id_gtfs_field != 'stop_id': return getattr(gtfs_stop_ids_map[gtfs_stop_id], stop_id_gtfs_field) else: return gtfs_stop_id # stops_map allows looking up row from stops.txt via OpenTransit stop ID if stop_id_gtfs_field != 'stop_id': stops_map = {getattr(stop, stop_id_gtfs_field): stop for stop in stops_df.itertuples()} else: stops_map = gtfs_stop_ids_map if agency.provider == 'nextbus': nextbus_route_order = [route.id for route in nextbus.get_route_list(agency.nextbus_id)] for route in routes_df.itertuples(): gtfs_route_id = route.route_id short_name = route.route_short_name long_name = route.route_long_name if isinstance(short_name, str) and isinstance(long_name, str): title = f'{short_name} - {long_name}' elif isinstance(short_name, str): title = short_name else: title = long_name type = int(route.route_type) if hasattr(route, 'route_type') else None url = route.route_url if hasattr(route, 'route_url') and isinstance(route.route_url, str) else None #color = route.route_color #text_color = route.route_text_color route_id = getattr(route, agency.route_id_gtfs_field) if agency.provider == 'nextbus': route_id = route_id.replace('-', '_') # hack to handle muni route IDs where e.g. GTFS has "T-OWL" but nextbus has "T_OWL" try: nextbus_route_config = nextbus.get_route_config(agency.nextbus_id, route_id) title = nextbus_route_config.title except Exception as ex: print(ex) continue try: sort_order = nextbus_route_order.index(route_id) except ValueError as ex: print(ex) sort_order = None else: sort_order = int(route.route_sort_order) if hasattr(route, 'route_sort_order') else None print(f'route {route_id} {title}') route_data = { 'id': route_id, 'title': title, 'url': url, 'type': type, #'color': color, #'text_color': text_color, 'gtfs_route_id': gtfs_route_id, 'sort_order': sort_order, 'stops': {}, 'directions': [], } directions = [] route_directions_df = feed.get('route_directions.txt') # unofficial trimet gtfs extension if not route_directions_df.empty: route_directions_df = route_directions_df[route_directions_df['route_id'] == gtfs_route_id] else: route_directions_df = None routes_data.append(route_data) route_trips_df = trips_df[trips_df['route_id'] == gtfs_route_id] route_direction_id_values = route_trips_df['direction_id'].values def add_custom_direction(custom_direction_info): direction_id = custom_direction_info['id'] print(f' custom direction = {direction_id}') gtfs_direction_id = custom_direction_info['gtfs_direction_id'] direction_trips_df = route_trips_df[route_direction_id_values == gtfs_direction_id] included_stop_ids = custom_direction_info.get('included_stop_ids', []) excluded_stop_ids = custom_direction_info.get('excluded_stop_ids', []) shapes = get_unique_shapes( direction_trips_df=direction_trips_df, stop_times_df=stop_times_df, stops_map=stops_map, normalize_gtfs_stop_id=normalize_gtfs_stop_id ) def contains_included_stops(shape_stop_ids): min_index = 0 for stop_id in included_stop_ids: try: index = shape_stop_ids.index(stop_id, min_index) except ValueError: return False min_index = index + 1 # stops must appear in same order as in included_stop_ids return True def contains_excluded_stop(shape_stop_ids): for stop_id in excluded_stop_ids: try: index = shape_stop_ids.index(stop_id) return True except ValueError: pass return False matching_shapes = [] for shape in shapes: shape_stop_ids = shape['stop_ids'] if contains_included_stops(shape_stop_ids) and not contains_excluded_stop(shape_stop_ids): matching_shapes.append(shape) if len(matching_shapes) != 1: matching_shape_ids = [shape['shape_id'] for shape in matching_shapes] error_message = f'{len(matching_shapes)} shapes found for route {route_id} with GTFS direction ID {gtfs_direction_id}' if len(included_stop_ids) > 0: error_message += f" including {','.join(included_stop_ids)}" if len(excluded_stop_ids) > 0: error_message += f" excluding {','.join(excluded_stop_ids)}" if len(matching_shape_ids) > 0: error_message += f": {','.join(matching_shape_ids)}" raise Exception(error_message) matching_shape = matching_shapes[0] matching_shape_id = matching_shape['shape_id'] matching_shape_count = matching_shape['count'] print(f' matching shape = {matching_shape_id} ({matching_shape_count} times)') add_direction( id=direction_id, gtfs_shape_id=matching_shape_id, gtfs_direction_id=gtfs_direction_id, stop_ids=matching_shape['stop_ids'], title=custom_direction_info.get('title', None) ) def add_default_direction(direction_id): print(f' default direction = {direction_id}') direction_trips_df = route_trips_df[route_direction_id_values == direction_id] shapes = get_unique_shapes( direction_trips_df=direction_trips_df, stop_times_df=stop_times_df, stops_map=stops_map, normalize_gtfs_stop_id=normalize_gtfs_stop_id) best_shape = shapes[0] best_shape_id = best_shape['shape_id'] best_shape_count = best_shape['count'] print(f' most common shape = {best_shape_id} ({best_shape_count} times)') add_direction( id=direction_id, gtfs_shape_id=best_shape_id, gtfs_direction_id=direction_id, stop_ids=best_shape['stop_ids'] ) def add_direction(id, gtfs_shape_id, gtfs_direction_id, stop_ids, title = None): if title is None: default_direction_info = agency.default_directions.get(gtfs_direction_id, {}) title_prefix = default_direction_info.get('title_prefix', None) last_stop_id = stop_ids[-1] last_stop = stops_map[last_stop_id] if title_prefix is not None: title = f"{title_prefix} to {last_stop.stop_name}" else: title = f"To {last_stop.stop_name}" print(f' title = {title}') dir_data = { 'id': id, 'title': title, 'gtfs_shape_id': gtfs_shape_id, 'gtfs_direction_id': gtfs_direction_id, 'stops': stop_ids, 'stop_geometry': {}, } route_data['directions'].append(dir_data) for stop_id in stop_ids: stop = stops_map[stop_id] stop_data = { 'id': stop_id, 'lat': round(stop.geometry.y, 5), # stop_lat in gtfs 'lon': round(stop.geometry.x, 5), # stop_lon in gtfs 'title': stop.stop_name, 'url': stop.stop_url if hasattr(stop, 'stop_url') and isinstance(stop.stop_url, str) else None, } route_data['stops'][stop_id] = stop_data geometry = shapes_df[shapes_df['shape_id'] == gtfs_shape_id]['geometry'].values[0] # partridge returns GTFS geometries for each shape_id as a shapely LineString # (https://shapely.readthedocs.io/en/stable/manual.html#linestrings). # Each coordinate is an array in [lon,lat] format (note: longitude first, latitude second) dir_data['coords'] = [ { 'lat': round(coord[1], 5), 'lon': round(coord[0], 5) } for coord in geometry.coords ] if agency.provider == 'nextbus': # match nextbus direction IDs with GTFS direction IDs best_nextbus_dir_info, best_terminal_dist = match_nextbus_direction(nextbus_route_config, geometry) print(f' {direction_id} = {best_nextbus_dir_info.id} (terminal_dist={int(best_terminal_dist)}) {" (questionable match)" if best_terminal_dist > 300 else ""}') # dir_data['title'] = best_nextbus_dir_info.title dir_data['nextbus_direction_id'] = best_nextbus_dir_info.id start_lat = geometry.coords[0][1] start_lon = geometry.coords[0][0] #print(f" start_lat = {start_lat} start_lon = {start_lon}") deg_lat_dist = util.haver_distance(start_lat, start_lon, start_lat-0.1, start_lon)*10 deg_lon_dist = util.haver_distance(start_lat, start_lon, start_lat, start_lon-0.1)*10 # projection function from lon/lat coordinates in degrees (z ignored) to x/y coordinates in meters. # satisfying the interface of shapely.ops.transform (https://shapely.readthedocs.io/en/stable/manual.html#shapely.ops.transform). # This makes it possible to use shapely methods to calculate the distance in meters between geometries def project_xy(lon, lat, z=None): return (round((lon - start_lon) * deg_lon_dist, 1), round((lat - start_lat) * deg_lat_dist, 1)) xy_geometry = shapely.ops.transform(project_xy, geometry) shape_lon_lat = np.array(geometry).T shape_lon = shape_lon_lat[0] shape_lat = shape_lon_lat[1] shape_prev_lon = np.r_[shape_lon[0], shape_lon[:-1]] shape_prev_lat = np.r_[shape_lat[0], shape_lat[:-1]] # shape_cumulative_dist[i] is the cumulative distance in meters along the shape geometry from 0th to ith coordinate shape_cumulative_dist = np.cumsum(util.haver_distance(shape_lon, shape_lat, shape_prev_lon, shape_prev_lat)) shape_lines_xy = [shapely.geometry.LineString(xy_geometry.coords[i:i+2]) for i in range(0, len(xy_geometry.coords) - 1)] # this is the total distance of the GTFS shape, which may not be exactly the same as the # distance along the route between the first and last Nextbus stop dir_data['distance'] = int(shape_cumulative_dist[-1]) print(f" distance = {dir_data['distance']}") # Find each stop along the route shape, so that the frontend can draw line segments between stops along the shape start_index = 0 for stop_id in stop_ids: stop_info = route_data['stops'][stop_id] # Need to project lon/lat coords to x/y in order for shapely to determine the distance between # a point and a line (shapely doesn't support distance for lon/lat coords) stop_xy = shapely.geometry.Point(project_xy(stop_info['lon'], stop_info['lat'])) stop_geometry = get_stop_geometry(stop_xy, shape_lines_xy, shape_cumulative_dist, start_index) if stop_geometry['offset'] > 100: print(f" !! bad geometry for stop {stop_id}: {stop_geometry['offset']} m from route line segment") continue dir_data['stop_geometry'][stop_id] = stop_geometry start_index = stop_geometry['after_index'] if route_id in agency.custom_directions: for custom_direction_info in agency.custom_directions[route_id]: add_custom_direction(custom_direction_info) else: for direction_id in np.unique(route_direction_id_values): add_default_direction(direction_id) if routes_data[0]['sort_order'] is not None: sort_key = lambda route_data: route_data['sort_order'] else: sort_key = lambda route_data: route_data['id'] routes_data = sorted(routes_data, key=sort_key) data_str = json.dumps({ 'version': routeconfig.DefaultVersion, 'routes': routes_data }, separators=(',', ':')) cache_path = routeconfig.get_cache_path(agency_id) with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = routeconfig.get_s3_path(agency_id) s3_bucket = config.s3_bucket print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put( Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read' )
def bus_peak_frequencies( gtfs_path: str, test_date: typing.Optional[datetime.date] = None, am_peak: typing.Optional[typing.Tuple[int, int]] = None, pm_peak: typing.Optional[typing.Tuple[int, int]] = None, ) -> geopandas.GeoDataFrame: """ Compute AM and PM Peak frequencies for all the lines in a GTFS Feed. Parameters ========== gtfs_path: str The path (or URL) to a GTFS feed. test_date: datetime.date The test date for which to compute frequencies. Defaults to February 18th, 2020, an unremarkable weekday February. am_peak: tuple of integers The two hours (out of 24) demarcating the AM peak period. pm_peak: tuple of integers The two hours (out of 24) demarcating the PM peak period. """ # Set default values test_date = test_date or TEST_DATE am_peak = am_peak or (6, 9) pm_peak = pm_peak or (15, 19) am_duration = am_peak[1] - am_peak[0] pm_duration = pm_peak[1] - pm_peak[0] assert am_duration > 0 assert pm_duration > 0 # Download and read the GTFS feed with fsspec.open(gtfs_path, "rb") as infile: data = infile.read() with open(GTFS_FILE, "wb") as outfile: outfile.write(data) service_by_date = partridge.read_service_ids_by_date(GTFS_FILE) feed = partridge.load_geo_feed(GTFS_FILE) # Get the service for the test date try: test_service = next(v for k, v in service_by_date.items() if k == test_date) except StopIteration: raise ValueError(f"Could not find service for {test_date}") test_trips = feed.trips[feed.trips.service_id.isin(test_service)] test_stops = feed.stop_times[feed.stop_times.trip_id.isin( test_trips.trip_id)] # Get the departure, arrival, and mean time for each trip trip_timings = test_stops.groupby(test_stops.trip_id).agg({ "departure_time": min, "arrival_time": max }) trip_timings = trip_timings.assign( mean_time=trip_timings.departure_time + (trip_timings.arrival_time - trip_timings.departure_time) / 2.0) # Find all of the trips that fall within the AM and PM peak times. am_peak_trips = trip_timings[ (trip_timings.mean_time > am_peak[0] * 60 * 60) & (trip_timings.mean_time < am_peak[1] * 60 * 60)] pm_peak_trips = trip_timings[ (trip_timings.mean_time > pm_peak[0] * 60 * 60) & (trip_timings.mean_time < pm_peak[1] * 60 * 60)] am_peak_trips = test_trips.merge( am_peak_trips, left_on=test_trips.trip_id, right_index=True, ) pm_peak_trips = test_trips.merge( pm_peak_trips, left_on=test_trips.trip_id, right_index=True, ) # Compute the peak frequency am_peak_frequency = (am_peak_trips.groupby( [am_peak_trips.route_id, am_peak_trips.direction_id]).size().to_frame("am_peak_trips")) am_peak_frequency = am_peak_frequency.assign( am_peak_frequency=am_duration * 60 / am_peak_frequency.am_peak_trips) pm_peak_frequency = (pm_peak_trips.groupby( [pm_peak_trips.route_id, pm_peak_trips.direction_id]).size().to_frame("pm_peak_trips")) pm_peak_frequency = pm_peak_frequency.assign( pm_peak_frequency=pm_duration * 60 / pm_peak_frequency.pm_peak_trips) peak_frequency = pandas.concat([am_peak_frequency, pm_peak_frequency], axis=1, sort=False) # Add the route short name for easier legibility. peak_frequency = peak_frequency.join( feed.routes[["route_id", "route_short_name"]].set_index("route_id"), how="left", on="route_id", ) # Grab the most popular shape as the official one. route_shapes = (test_trips.groupby("route_id").agg({ "shape_id": lambda s: s.value_counts().index[0] }).reset_index().merge( feed.shapes, how="left", on="shape_id").set_index("route_id").drop(columns=["shape_id"])) peak_frequency = peak_frequency.merge( route_shapes, how="left", right_index=True, left_index=True).assign(agency=feed.agency.agency_name.iloc[0]) gdf = geopandas.GeoDataFrame(peak_frequency, geometry="geometry") gdf.crs = f"EPSG:{WGS84}" return gdf