def route_config(): route_id = request.args.get('route_id') route = nextbus.get_route_config('sf-muni', route_id) data = { 'id': route_id, 'title': route.title, 'directions': [{ 'id': dir.id, 'title': dir.title, 'name': dir.name, 'stops': dir.get_stop_ids() } for dir in route.get_direction_infos()], 'stops': { stop.id: { 'title': stop.title, 'lat': stop.lat, 'lon': stop.lon } for stop in route.get_stop_infos() } } return Response(json.dumps(data, indent=2), mimetype='application/json')
def get_arrival_history(as_of_date: datetime, time_zone: pytz.timezone, increment: timedelta, agency: str, route_ids: list, start_hour: int, continue_index): start_dt = time_zone.localize( datetime(as_of_date.year, as_of_date.month, as_of_date.day, hour=start_hour)) end_dt = start_dt + increment start_time = int(start_dt.timestamp()) end_time = int(end_dt.timestamp()) print(f"time = [{start_dt}, {end_dt})") t1 = time.time() state = trynapi.get_state(agency, as_of_date, start_time, end_time, route_ids) print(f'retrieved state in {round(time.time()-t1,1)} sec') for i, route_id in enumerate(route_ids): if continue_index is not None and i < continue_index: continue route_state = state.get_for_route(route_id) if route_state is None: print(f'no state for route {route_id}') continue route_config = nextbus.get_route_config(agency, route_id) t1 = time.time() arrivals_df = eclipses.find_arrivals(route_state, route_config, as_of_date, time_zone) history = arrival_history.from_data_frame(agency, route_id, arrivals_df, start_time, end_time) print(f'{route_id}: {round(time.time()-t1,1)} saving arrival history') arrival_history.save_for_date(history, d, args.s3) print(f'{route_id}: {round(time.time()-t1,2)} done')
def route_config(): route_id = request.args.get('route_id') params = {'route_id': route_id} if route_id is None: return make_error_response(params, "Missing route_id", 400) route = nextbus.get_route_config('sf-muni', route_id) if route is None: return make_error_response(params, f"Invalid route ID {route_id}", 404) data = { 'id': route_id, 'title': route.title, 'directions': [{ 'id': dir.id, 'title': dir.title, 'name': dir.name, 'stops': dir.get_stop_ids() } for dir in route.get_direction_infos()], 'stops': { stop.id: { 'title': stop.title, 'lat': stop.lat, 'lon': stop.lon } for stop in route.get_stop_infos() } } res = Response( json.dumps(data), mimetype='application/json') # no prettyprinting to save bandwidth if not DEBUG: res.headers['Cache-Control'] = 'max-age=3600' return res
def compute_arrivals_for_date(d: date, start_hour: int, tz: pytz.timezone, agency: str, route_ids: list, s3=False): start_dt = tz.localize(datetime(d.year, d.month, d.day, hour=start_hour)) end_dt = start_dt + timedelta(days=1) start_time = int(start_dt.timestamp()) end_time = int(end_dt.timestamp()) print(f"time = [{start_dt}, {end_dt})") t1 = time.time() state = trynapi.get_state(agency, d, start_time, end_time, route_ids) print(f'retrieved state in {round(time.time()-t1,1)} sec') for i, route_id in enumerate(route_ids): route_state = state.get_for_route(route_id) if route_state is None: print(f'no state for route {route_id}') continue route_config = nextbus.get_route_config(agency, route_id) t1 = time.time() arrivals_df = eclipses.find_arrivals(route_state, route_config, d, tz) history = arrival_history.from_data_frame(agency, route_id, arrivals_df, start_time, end_time) print(f'{route_id}: {round(time.time()-t1,1)} saving arrival history') arrival_history.save_for_date(history, d, s3) print(f'{route_id}: {round(time.time()-t1,2)} done')
def metrics_page(): metrics_start = time.time() route_id = request.args.get('route_id') if route_id is None: route_id = '12' start_stop_id = request.args.get('start_stop_id') if start_stop_id is None: start_stop_id = '3476' end_stop_id = request.args.get('end_stop_id') direction_id = request.args.get('direction_id') start_date_str = request.args.get('start_date') end_date_str = request.args.get('end_date') date_str = request.args.get('date') if date_str is not None: start_date_str = end_date_str = date_str else: if start_date_str is None: start_date_str = '2019-04-08' if end_date_str is None: end_date_str = start_date_str start_time_str = request.args.get('start_time') # e.g. "14:00" (24h time of day) end_time_str = request.args.get('end_time') # e.g. "18:00" or "03:00+1" (24h time of day) params = { 'start_stop_id': start_stop_id, 'end_stop_id': end_stop_id, 'route_id': route_id, 'direction_id': direction_id, 'start_date': start_date_str, 'end_date': end_date_str, 'start_time': start_time_str, 'end_time': end_time_str, } data = { 'params': params } try: route_config = nextbus.get_route_config('sf-muni', route_id) start_stop_info = route_config.get_stop_info(start_stop_id) if start_stop_info is None: raise errors.ValidationError(f"Stop {start_stop_id} is not on route {route_id}") data['start_stop_title'] = start_stop_info.title if end_stop_id: end_stop_info = route_config.get_stop_info(end_stop_id) if end_stop_info is None: raise errors.ValidationError(f"Stop {end_stop_id} is not on route {route_id}") data['end_stop_title'] = end_stop_info.title rng = metrics.Range( util.get_dates_in_range(start_date_str, end_date_str), start_time_str, end_time_str, pytz.timezone('US/Pacific') ) route_metrics = metrics.RouteMetrics('sf-muni', route_id) keys = ['count','avg','min','median','max','percentiles','histogram'] data['wait_times'] = route_metrics.get_wait_time_stats( direction_id, start_stop_id, rng, keys ) data['trip_times'] = route_metrics.get_trip_time_stats( direction_id, start_stop_id, end_stop_id, rng, keys ) data['headway_min'] = route_metrics.get_headway_min_stats( direction_id, start_stop_id, rng, keys ) except errors.ArrivalHistoryNotFoundError as ex: return make_error_response(params, str(ex), 404) except errors.ValidationError as ex: return make_error_response(params, str(ex), 400) metrics_end = time.time() data['processing_time'] = (metrics_end - metrics_start) res = Response(json.dumps(data, indent=2), mimetype='application/json') if not DEBUG: res.headers['Cache-Control'] = 'max-age=60' return res
def compute_trip_times(d: date, tz, agency_id, routes, save_to_s3=True, stat_ids=None): if stat_ids is None: stat_ids = stat_groups.keys() print(d) time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00', '19:00')) timestamp_intervals = [ (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp())) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) all_trip_time_stats = {} for interval_index, _ in enumerate(timestamp_intervals): all_trip_time_stats[interval_index] = {} for stat_id in stat_ids: all_trip_time_stats[interval_index][stat_id] = {} for route in routes: route_id = route.id print(route_id) t1 = time.time() route_config = nextbus.get_route_config(agency_id, route_id) try: history = arrival_history.get_by_date(agency_id, route_id, d) except FileNotFoundError as ex: print(ex) continue route_df = history.get_data_frame().sort_values('TRIP', axis=0) add_trip_time_stats_for_route(all_trip_time_stats, timestamp_intervals, stat_ids, route_config, route_df) t2 = time.time() print(f' {round(t2-t1, 2)} sec') for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] for stat_id in stat_ids: stat = stat_groups[stat_id] data_str = json.dumps( { 'version': trip_times.DefaultVersion, 'start_time': start_time, 'end_time': end_time, 'stat': stat, 'routes': all_trip_time_stats[interval_index][stat_id] }, separators=(',', ':')) cache_path = trip_times.get_cache_path(agency_id, d, stat_id, start_time_str, end_time_str) print(cache_path) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents=True, exist_ok=True) print(f'saving to {cache_path}') with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = trip_times.get_s3_path(agency_id, d, stat_id, start_time_str, end_time_str) s3_bucket = trip_times.get_s3_bucket() print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put(Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read')
help='hh:mm of first local time to include each day') parser.add_argument('--end-time', help='hh:mm of first local time to exclude each day') args = parser.parse_args() route_id = args.route date_str = args.date vid = args.vid agency = 'sf-muni' start_time_str = args.start_time end_time_str = args.end_time route_config = nextbus.get_route_config('sf-muni', route_id) tz = pytz.timezone('US/Pacific') dates = util.get_dates_in_range(args.date, args.date) print(f"Date: {', '.join([str(date) for date in dates])}") print(f"Time of Day: [{start_time_str}, {end_time_str})") print(f"Route: {route_id} ({route_config.title})") print(f"Vehicle: {vid}") for d in dates: history = arrival_history.get_by_date(agency, route_id, d) df = history.get_data_frame(vehicle_id=vid, tz=tz,
version = args.version if version is None: version = arrival_history.DefaultVersion route_id = args.route date_str = args.date s1 = args.s1 s2 = args.s2 agency = 'sf-muni' start_time_str = args.start_time end_time_str = args.end_time route_config = nextbus.get_route_config(agency, route_id) s1_info = route_config.get_stop_info(s1) s1_dirs = route_config.get_directions_for_stop(s1) if len(s1_dirs) == 0 or s1_info is None: raise Exception(f"invalid stop id {s1}") s2_info = route_config.get_stop_info(s2) s2_dirs = route_config.get_directions_for_stop(s2) if len(s1_dirs) == 0 or s2_info is None: raise Exception(f"invalid stop id {s2}") if s1 == s2: raise Exception(f"stop {s1} and {s2} are the same") common_dirs = [dir for dir in s1_dirs if dir in s2_dirs]
def compute_wait_times(agency_id, d: date, routes, tz, stat_ids, save_to_s3=False): print(d) all_wait_time_stats = {} time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00', '19:00')) timestamp_intervals = [ (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp())) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) for interval_index, _ in enumerate(timestamp_intervals): all_wait_time_stats[interval_index] = {} for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id] = {} for route in routes: route_id = route.id print(route_id) route_config = nextbus.get_route_config(agency_id, route_id) try: history = arrival_history.get_by_date(agency_id, route_id, d) except FileNotFoundError as ex: print(ex) continue for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id][route_id] = {} df = history.get_data_frame() df = df.sort_values('TIME', axis=0) for dir_info in route_config.get_direction_infos(): dir_id = dir_info.id for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id][route_id][ dir_id] = {} stop_ids = dir_info.get_stop_ids() sid_values = df['SID'].values for i, stop_id in enumerate(stop_ids): stop_df = df[sid_values == stop_id] all_time_values = stop_df['TIME'].values for interval_index, ( start_time, end_time) in enumerate(timestamp_intervals): wait_time_stats = wait_times.get_stats( all_time_values, start_time, end_time) add_wait_time_stats_for_stop( all_wait_time_stats[interval_index], stat_ids, route_id, dir_id, stop_id, wait_time_stats) for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: add_median_wait_time_stats_for_direction( all_wait_time_stats[interval_index][stat_id][route_id] [dir_id], stat_id) for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] for stat_id in stat_ids: stat = stat_groups[stat_id] data_str = json.dumps( { 'version': wait_times.DefaultVersion, 'start_time': start_time, 'end_time': end_time, 'stat': stat, 'routes': all_wait_time_stats[interval_index][stat_id] }, separators=(',', ':')) cache_path = wait_times.get_cache_path(agency_id, d, stat_id, start_time_str, end_time_str) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents=True, exist_ok=True) print(f'saving to {cache_path}') with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = wait_times.get_s3_path(agency_id, d, stat_id, start_time_str, end_time_str) s3_bucket = wait_times.get_s3_bucket() print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put(Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read')
def save_routes_for_agency(agency: config.Agency, save_to_s3=True): agency_id = agency.id gtfs_cache_dir = f'{util.get_data_dir()}/gtfs-{agency_id}' download_gtfs_data(agency, gtfs_cache_dir) feed = ptg.load_geo_feed(gtfs_cache_dir, {}) print(f"Loading {agency_id} routes...") routes_df = feed.routes if agency.gtfs_agency_id is not None: routes_df = routes_df[routes_df.agency_id == agency.gtfs_agency_id] routes_data = [] print(f"Loading {agency_id} trips...") trips_df = feed.trips trips_df['direction_id'] = trips_df['direction_id'].astype(str) print(f"Loading {agency_id} stop times...") stop_times_df = feed.stop_times print(f"Loading {agency_id} shapes...") shapes_df = feed.shapes print(f"Loading {agency_id} stops...") stops_df = feed.stops # gtfs_stop_ids_map allows looking up row from stops.txt via GTFS stop_id gtfs_stop_ids_map = {stop.stop_id: stop for stop in stops_df.itertuples()} stop_id_gtfs_field = agency.stop_id_gtfs_field # get OpenTransit stop ID for GTFS stop_id (may be the same) def normalize_gtfs_stop_id(gtfs_stop_id): if stop_id_gtfs_field != 'stop_id': return getattr(gtfs_stop_ids_map[gtfs_stop_id], stop_id_gtfs_field) else: return gtfs_stop_id # stops_map allows looking up row from stops.txt via OpenTransit stop ID if stop_id_gtfs_field != 'stop_id': stops_map = {getattr(stop, stop_id_gtfs_field): stop for stop in stops_df.itertuples()} else: stops_map = gtfs_stop_ids_map if agency.provider == 'nextbus': nextbus_route_order = [route.id for route in nextbus.get_route_list(agency.nextbus_id)] for route in routes_df.itertuples(): gtfs_route_id = route.route_id short_name = route.route_short_name long_name = route.route_long_name if isinstance(short_name, str) and isinstance(long_name, str): title = f'{short_name} - {long_name}' elif isinstance(short_name, str): title = short_name else: title = long_name type = int(route.route_type) if hasattr(route, 'route_type') else None url = route.route_url if hasattr(route, 'route_url') and isinstance(route.route_url, str) else None #color = route.route_color #text_color = route.route_text_color route_id = getattr(route, agency.route_id_gtfs_field) if agency.provider == 'nextbus': route_id = route_id.replace('-', '_') # hack to handle muni route IDs where e.g. GTFS has "T-OWL" but nextbus has "T_OWL" try: nextbus_route_config = nextbus.get_route_config(agency.nextbus_id, route_id) title = nextbus_route_config.title except Exception as ex: print(ex) continue try: sort_order = nextbus_route_order.index(route_id) except ValueError as ex: print(ex) sort_order = None else: sort_order = int(route.route_sort_order) if hasattr(route, 'route_sort_order') else None print(f'route {route_id} {title}') route_data = { 'id': route_id, 'title': title, 'url': url, 'type': type, #'color': color, #'text_color': text_color, 'gtfs_route_id': gtfs_route_id, 'sort_order': sort_order, 'stops': {}, 'directions': [], } directions = [] route_directions_df = feed.get('route_directions.txt') # unofficial trimet gtfs extension if not route_directions_df.empty: route_directions_df = route_directions_df[route_directions_df['route_id'] == gtfs_route_id] else: route_directions_df = None routes_data.append(route_data) route_trips_df = trips_df[trips_df['route_id'] == gtfs_route_id] route_direction_id_values = route_trips_df['direction_id'].values def add_custom_direction(custom_direction_info): direction_id = custom_direction_info['id'] print(f' custom direction = {direction_id}') gtfs_direction_id = custom_direction_info['gtfs_direction_id'] direction_trips_df = route_trips_df[route_direction_id_values == gtfs_direction_id] included_stop_ids = custom_direction_info.get('included_stop_ids', []) excluded_stop_ids = custom_direction_info.get('excluded_stop_ids', []) shapes = get_unique_shapes( direction_trips_df=direction_trips_df, stop_times_df=stop_times_df, stops_map=stops_map, normalize_gtfs_stop_id=normalize_gtfs_stop_id ) def contains_included_stops(shape_stop_ids): min_index = 0 for stop_id in included_stop_ids: try: index = shape_stop_ids.index(stop_id, min_index) except ValueError: return False min_index = index + 1 # stops must appear in same order as in included_stop_ids return True def contains_excluded_stop(shape_stop_ids): for stop_id in excluded_stop_ids: try: index = shape_stop_ids.index(stop_id) return True except ValueError: pass return False matching_shapes = [] for shape in shapes: shape_stop_ids = shape['stop_ids'] if contains_included_stops(shape_stop_ids) and not contains_excluded_stop(shape_stop_ids): matching_shapes.append(shape) if len(matching_shapes) != 1: matching_shape_ids = [shape['shape_id'] for shape in matching_shapes] error_message = f'{len(matching_shapes)} shapes found for route {route_id} with GTFS direction ID {gtfs_direction_id}' if len(included_stop_ids) > 0: error_message += f" including {','.join(included_stop_ids)}" if len(excluded_stop_ids) > 0: error_message += f" excluding {','.join(excluded_stop_ids)}" if len(matching_shape_ids) > 0: error_message += f": {','.join(matching_shape_ids)}" raise Exception(error_message) matching_shape = matching_shapes[0] matching_shape_id = matching_shape['shape_id'] matching_shape_count = matching_shape['count'] print(f' matching shape = {matching_shape_id} ({matching_shape_count} times)') add_direction( id=direction_id, gtfs_shape_id=matching_shape_id, gtfs_direction_id=gtfs_direction_id, stop_ids=matching_shape['stop_ids'], title=custom_direction_info.get('title', None) ) def add_default_direction(direction_id): print(f' default direction = {direction_id}') direction_trips_df = route_trips_df[route_direction_id_values == direction_id] shapes = get_unique_shapes( direction_trips_df=direction_trips_df, stop_times_df=stop_times_df, stops_map=stops_map, normalize_gtfs_stop_id=normalize_gtfs_stop_id) best_shape = shapes[0] best_shape_id = best_shape['shape_id'] best_shape_count = best_shape['count'] print(f' most common shape = {best_shape_id} ({best_shape_count} times)') add_direction( id=direction_id, gtfs_shape_id=best_shape_id, gtfs_direction_id=direction_id, stop_ids=best_shape['stop_ids'] ) def add_direction(id, gtfs_shape_id, gtfs_direction_id, stop_ids, title = None): if title is None: default_direction_info = agency.default_directions.get(gtfs_direction_id, {}) title_prefix = default_direction_info.get('title_prefix', None) last_stop_id = stop_ids[-1] last_stop = stops_map[last_stop_id] if title_prefix is not None: title = f"{title_prefix} to {last_stop.stop_name}" else: title = f"To {last_stop.stop_name}" print(f' title = {title}') dir_data = { 'id': id, 'title': title, 'gtfs_shape_id': gtfs_shape_id, 'gtfs_direction_id': gtfs_direction_id, 'stops': stop_ids, 'stop_geometry': {}, } route_data['directions'].append(dir_data) for stop_id in stop_ids: stop = stops_map[stop_id] stop_data = { 'id': stop_id, 'lat': round(stop.geometry.y, 5), # stop_lat in gtfs 'lon': round(stop.geometry.x, 5), # stop_lon in gtfs 'title': stop.stop_name, 'url': stop.stop_url if hasattr(stop, 'stop_url') and isinstance(stop.stop_url, str) else None, } route_data['stops'][stop_id] = stop_data geometry = shapes_df[shapes_df['shape_id'] == gtfs_shape_id]['geometry'].values[0] # partridge returns GTFS geometries for each shape_id as a shapely LineString # (https://shapely.readthedocs.io/en/stable/manual.html#linestrings). # Each coordinate is an array in [lon,lat] format (note: longitude first, latitude second) dir_data['coords'] = [ { 'lat': round(coord[1], 5), 'lon': round(coord[0], 5) } for coord in geometry.coords ] if agency.provider == 'nextbus': # match nextbus direction IDs with GTFS direction IDs best_nextbus_dir_info, best_terminal_dist = match_nextbus_direction(nextbus_route_config, geometry) print(f' {direction_id} = {best_nextbus_dir_info.id} (terminal_dist={int(best_terminal_dist)}) {" (questionable match)" if best_terminal_dist > 300 else ""}') # dir_data['title'] = best_nextbus_dir_info.title dir_data['nextbus_direction_id'] = best_nextbus_dir_info.id start_lat = geometry.coords[0][1] start_lon = geometry.coords[0][0] #print(f" start_lat = {start_lat} start_lon = {start_lon}") deg_lat_dist = util.haver_distance(start_lat, start_lon, start_lat-0.1, start_lon)*10 deg_lon_dist = util.haver_distance(start_lat, start_lon, start_lat, start_lon-0.1)*10 # projection function from lon/lat coordinates in degrees (z ignored) to x/y coordinates in meters. # satisfying the interface of shapely.ops.transform (https://shapely.readthedocs.io/en/stable/manual.html#shapely.ops.transform). # This makes it possible to use shapely methods to calculate the distance in meters between geometries def project_xy(lon, lat, z=None): return (round((lon - start_lon) * deg_lon_dist, 1), round((lat - start_lat) * deg_lat_dist, 1)) xy_geometry = shapely.ops.transform(project_xy, geometry) shape_lon_lat = np.array(geometry).T shape_lon = shape_lon_lat[0] shape_lat = shape_lon_lat[1] shape_prev_lon = np.r_[shape_lon[0], shape_lon[:-1]] shape_prev_lat = np.r_[shape_lat[0], shape_lat[:-1]] # shape_cumulative_dist[i] is the cumulative distance in meters along the shape geometry from 0th to ith coordinate shape_cumulative_dist = np.cumsum(util.haver_distance(shape_lon, shape_lat, shape_prev_lon, shape_prev_lat)) shape_lines_xy = [shapely.geometry.LineString(xy_geometry.coords[i:i+2]) for i in range(0, len(xy_geometry.coords) - 1)] # this is the total distance of the GTFS shape, which may not be exactly the same as the # distance along the route between the first and last Nextbus stop dir_data['distance'] = int(shape_cumulative_dist[-1]) print(f" distance = {dir_data['distance']}") # Find each stop along the route shape, so that the frontend can draw line segments between stops along the shape start_index = 0 for stop_id in stop_ids: stop_info = route_data['stops'][stop_id] # Need to project lon/lat coords to x/y in order for shapely to determine the distance between # a point and a line (shapely doesn't support distance for lon/lat coords) stop_xy = shapely.geometry.Point(project_xy(stop_info['lon'], stop_info['lat'])) stop_geometry = get_stop_geometry(stop_xy, shape_lines_xy, shape_cumulative_dist, start_index) if stop_geometry['offset'] > 100: print(f" !! bad geometry for stop {stop_id}: {stop_geometry['offset']} m from route line segment") continue dir_data['stop_geometry'][stop_id] = stop_geometry start_index = stop_geometry['after_index'] if route_id in agency.custom_directions: for custom_direction_info in agency.custom_directions[route_id]: add_custom_direction(custom_direction_info) else: for direction_id in np.unique(route_direction_id_values): add_default_direction(direction_id) if routes_data[0]['sort_order'] is not None: sort_key = lambda route_data: route_data['sort_order'] else: sort_key = lambda route_data: route_data['id'] routes_data = sorted(routes_data, key=sort_key) data_str = json.dumps({ 'version': routeconfig.DefaultVersion, 'routes': routes_data }, separators=(',', ':')) cache_path = routeconfig.get_cache_path(agency_id) with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = routeconfig.get_s3_path(agency_id) s3_bucket = config.s3_bucket print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put( Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read' )
def metrics_page(): metrics_start = time.time() route_id = request.args.get('route_id') if route_id is None: route_id = '12' start_stop_id = request.args.get('start_stop_id') if start_stop_id is None: start_stop_id = '3476' end_stop_id = request.args.get('end_stop_id') direction_id = request.args.get('direction_id') start_date_str = request.args.get('start_date') end_date_str = request.args.get('end_date') date_str = request.args.get('date') if date_str is not None: start_date_str = end_date_str = date_str else: if start_date_str is None: start_date_str = '2019-02-01' if end_date_str is None: end_date_str = start_date_str start_time_str = request.args.get( 'start_time') # e.g. "14:00" (24h time of day) end_time_str = request.args.get( 'end_time') # e.g. "18:00" (24h time of day) params = { 'start_stop_id': start_stop_id, 'end_stop_id': end_stop_id, 'route_id': route_id, 'direction_id': direction_id, 'start_date': start_date_str, 'end_date': end_date_str, 'start_time': start_time_str, 'end_time': end_time_str, } try: dates = util.get_dates_in_range(start_date_str, end_date_str) except Exception as ex: return Response(json.dumps({ 'params': params, 'error': str(ex), }, indent=2), status=400, mimetype='application/json') tz = pytz.timezone('US/Pacific') route_config = nextbus.get_route_config('sf-muni', route_id) start_stop_info = route_config.get_stop_info(start_stop_id) end_stop_info = route_config.get_stop_info( end_stop_id) if end_stop_id else None # 404 if the given stop isn't on the route # TODO: what should be done for the case where the start stop id is valid but the end stop id isn't? if start_stop_info is None: return Response(json.dumps( { 'params': params, 'error': f"Stop {start_stop_id} is not on route {route_id}", }, indent=2), status=404, mimetype='application/json') if direction_id is not None: dir_info = route_config.get_direction_info(direction_id) if dir_info is not None: dir_infos = [dir_info] else: dir_infos = [] else: # TODO: validation for end_stop_id directions if given (see trips.py) dirs = route_config.get_directions_for_stop(start_stop_id) dir_infos = [ route_config.get_direction_info(direction) for direction in dirs ] if end_stop_id: end_stop_dirs = route_config.get_directions_for_stop(end_stop_id) both_stops_same_dir = direction_id in end_stop_dirs directions = [{ 'id': dir_info.id, 'title': dir_info.title } for dir_info in dir_infos] headway_min_arr = [] waits = [] if end_stop_id: completed_trips = [] for d in dates: try: history = arrival_history.get_by_date('sf-muni', route_id, d) df = history.get_data_frame(start_stop_id, tz=tz, direction_id=direction_id, start_time_str=start_time_str, end_time_str=end_time_str) # get all headways for the selected stop (arrival time minus previous arrival time), computed separately for each day df['headway_min'] = metrics.compute_headway_minutes(df) # temporarily skip calculation of wait times until data is shown in front end waits.append( wait_times.get_waits(df, start_stop_info, d, tz, route_id, start_time_str, end_time_str)) if end_stop_id and both_stops_same_dir: trips = trip_times.get_trip_times(df, history, tz, start_stop_id, end_stop_id) completed_trips.append( trips.trip_min[trips.trip_min.notnull()]) headway_min = df.headway_min[df.headway_min.notnull( )] # remove NaN row (first bus of the day) headway_min_arr.append(df.headway_min) except FileNotFoundError as ex: return Response(json.dumps( { 'params': params, 'error': f"Arrival history not found for route {route_id} on {d.isoformat()}", }, indent=2), status=404, mimetype='application/json') except IndexError as ex: return Response(json.dumps( { 'params': params, 'error': f"No arrivals found for stop {start_stop_id} on route {route_id} in direction {direction_id} on {d.isoformat()}", }, indent=2), status=404, mimetype='application/json') headway_min = pd.concat(headway_min_arr) waits = pd.concat(waits) if end_stop_id and both_stops_same_dir: completed_trips = pd.concat(completed_trips) if headway_min.empty: return Response(json.dumps( { 'params': params, 'error': f"No arrivals for stop {start_stop_id} on route {route_id}", }, indent=2), status=404, mimetype='application/json') data = { 'params': params, 'route_title': route_config.title, 'start_stop_title': start_stop_info.title if start_stop_info else None, 'end_stop_title': end_stop_info.title if end_stop_info else None, 'directions': directions, 'headway_min': metrics.get_headways_stats(headway_min), 'wait_times': metrics.get_wait_times_stats(waits, tz), 'trip_times': metrics.get_trip_times_stats(completed_trips, start_stop_id, end_stop_id) if end_stop_id and both_stops_same_dir else None, } metrics_end = time.time() data['processing_time'] = (metrics_end - metrics_start) return Response(json.dumps(data, indent=2), mimetype='application/json')