def add_headway_and_wait_time_stats_for_route(all_stats, timestamp_intervals, route_config, df): route_id = route_config.id df = df.sort_values('DEPARTURE_TIME', axis=0) sid_values = df['SID'].values did_values = df['DID'].values for dir_info in route_config.get_direction_infos(): dir_id = dir_info.id stop_ids = dir_info.get_stop_ids() all_median_headways = collections.defaultdict(list) all_median_wait_times = collections.defaultdict(list) for i, stop_id in enumerate(stop_ids): stop_df = df[(sid_values == stop_id) & (did_values == dir_id)] all_time_values = stop_df['DEPARTURE_TIME'].values for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): dir_stats = all_stats[StatIds.Combined][interval_index][ route_id]['directions'][dir_id] headways = metrics.compute_headway_minutes( all_time_values, start_time, end_time) if len(headways) > 0: all_median_headways[interval_index].append( np.median(headways)) wait_time_stats = wait_times.get_stats(all_time_values, start_time, end_time) median_wait_time = wait_time_stats.get_quantile(0.5) if median_wait_time is not None: all_median_wait_times[interval_index].append( median_wait_time) all_stats[StatIds.MedianTripTimes][interval_index][ route_id]['directions'][dir_id]['medianWaitTimes'][ stop_id] = round(median_wait_time, 1) for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): dir_stats = all_stats[StatIds.Combined][interval_index][route_id][ 'directions'][dir_id] dir_stats['medianWaitTime'] = get_median_or_none( all_median_wait_times[interval_index]) dir_stats['medianHeadway'] = get_median_or_none( all_median_headways[interval_index])
get_history_ms += t2 - t1 get_data_frame_ms += t3 - t2 df = pd.concat(route_dfs) if df.empty: print(f"no arrival times found for stop {stop_id} on {date_str}") continue df = df.sort_values('TIME', axis=0) t4 = time.time() * 1000 df['headway_min'] = np.r_[ np.nan, metrics.compute_headway_minutes(df['TIME'].values)] df['DATE_TIME'] = df.TIME.apply( lambda t: datetime.fromtimestamp(t, tz)) t5 = time.time() * 1000 for row in df.itertuples(): did = row.DID dir_info = [ dir_info for dir_info in dir_infos if dir_info.id == did ][0] dist_str = f'{row.DIST}'.rjust(3) dwell_time = util.render_dwell_time(row.DEPARTURE_TIME - row.TIME) headway_str = f'{round(row.headway_min, 1)}'.rjust(4) print( f"{row.DATE_TIME.date()} {row.DATE_TIME.time()} ({row.TIME}) {dwell_time} vid:{row.VID} {dist_str}m {headway_str} min ({row.ROUTE} - {dir_info.title})"
timetable = timetables.get_by_date(agency_id, route_id, d) route_config = agency.get_route_config(route_id) tz = agency.tz direction_id = args.dir timetable_df = timetable.get_data_frame( stop_id=stop_id, direction_id=direction_id).sort_values('TIME') early_sec = early_min * 60 late_sec = late_min * 60 timetable_df['scheduled_headway'] = np.r_[ np.nan, metrics.compute_headway_minutes(timetable_df['TIME'].values)] if comparison: history = arrival_history.get_by_date(agency_id, route_id, d) arrivals_df = history.get_data_frame(stop_id=stop_id, direction_id=direction_id) comparison_df = timetables.match_schedule_to_actual_times( timetable_df['TIME'].values, arrivals_df['TIME'].values, early_sec=early_sec, late_sec=late_sec) timetable_df = pd.concat([timetable_df, comparison_df], axis=1) timetable_df['DATE_TIME'] = timetable_df['TIME'].apply(
route_dfs.append(route_df) get_history_ms += t2 - t1 get_data_frame_ms += t3 - t2 df = pd.concat(route_dfs) if df.empty: print(f"no arrival times found for stop {stop_id} on {date_str}") continue df = df.sort_values('TIME', axis=0) t4 = time.time() * 1000 df['headway_min'] = metrics.compute_headway_minutes(df) t5 = time.time() * 1000 for index, row in df.iterrows(): did = row.DID dir_info = [ dir_info for dir_info in dir_infos if dir_info.id == did ][0] print( f"t={row.DATE_STR} {row.TIME_STR} ({row.TIME}) v:{row.VID} {round(row.headway_min,1)} min ({row.ROUTE} - {dir_info.title})" ) t6 = time.time() * 1000 day_headways = df.headway_min[df.headway_min.notnull()]
def metrics_page(): metrics_start = time.time() route_id = request.args.get('route_id') if route_id is None: route_id = '12' start_stop_id = request.args.get('start_stop_id') if start_stop_id is None: start_stop_id = '3476' end_stop_id = request.args.get('end_stop_id') direction_id = request.args.get('direction_id') start_date_str = request.args.get('start_date') end_date_str = request.args.get('end_date') date_str = request.args.get('date') if date_str is not None: start_date_str = end_date_str = date_str else: if start_date_str is None: start_date_str = '2019-02-01' if end_date_str is None: end_date_str = start_date_str start_time_str = request.args.get( 'start_time') # e.g. "14:00" (24h time of day) end_time_str = request.args.get( 'end_time') # e.g. "18:00" (24h time of day) params = { 'start_stop_id': start_stop_id, 'end_stop_id': end_stop_id, 'route_id': route_id, 'direction_id': direction_id, 'start_date': start_date_str, 'end_date': end_date_str, 'start_time': start_time_str, 'end_time': end_time_str, } try: dates = util.get_dates_in_range(start_date_str, end_date_str) except Exception as ex: return Response(json.dumps({ 'params': params, 'error': str(ex), }, indent=2), status=400, mimetype='application/json') tz = pytz.timezone('US/Pacific') route_config = nextbus.get_route_config('sf-muni', route_id) start_stop_info = route_config.get_stop_info(start_stop_id) end_stop_info = route_config.get_stop_info( end_stop_id) if end_stop_id else None # 404 if the given stop isn't on the route # TODO: what should be done for the case where the start stop id is valid but the end stop id isn't? if start_stop_info is None: return Response(json.dumps( { 'params': params, 'error': f"Stop {start_stop_id} is not on route {route_id}", }, indent=2), status=404, mimetype='application/json') if direction_id is not None: dir_info = route_config.get_direction_info(direction_id) if dir_info is not None: dir_infos = [dir_info] else: dir_infos = [] else: # TODO: validation for end_stop_id directions if given (see trips.py) dirs = route_config.get_directions_for_stop(start_stop_id) dir_infos = [ route_config.get_direction_info(direction) for direction in dirs ] if end_stop_id: end_stop_dirs = route_config.get_directions_for_stop(end_stop_id) both_stops_same_dir = direction_id in end_stop_dirs directions = [{ 'id': dir_info.id, 'title': dir_info.title } for dir_info in dir_infos] headway_min_arr = [] waits = [] if end_stop_id: completed_trips = [] for d in dates: try: history = arrival_history.get_by_date('sf-muni', route_id, d) df = history.get_data_frame(start_stop_id, tz=tz, direction_id=direction_id, start_time_str=start_time_str, end_time_str=end_time_str) # get all headways for the selected stop (arrival time minus previous arrival time), computed separately for each day df['headway_min'] = metrics.compute_headway_minutes(df) # temporarily skip calculation of wait times until data is shown in front end waits.append( wait_times.get_waits(df, start_stop_info, d, tz, route_id, start_time_str, end_time_str)) if end_stop_id and both_stops_same_dir: trips = trip_times.get_trip_times(df, history, tz, start_stop_id, end_stop_id) completed_trips.append( trips.trip_min[trips.trip_min.notnull()]) headway_min = df.headway_min[df.headway_min.notnull( )] # remove NaN row (first bus of the day) headway_min_arr.append(df.headway_min) except FileNotFoundError as ex: return Response(json.dumps( { 'params': params, 'error': f"Arrival history not found for route {route_id} on {d.isoformat()}", }, indent=2), status=404, mimetype='application/json') except IndexError as ex: return Response(json.dumps( { 'params': params, 'error': f"No arrivals found for stop {start_stop_id} on route {route_id} in direction {direction_id} on {d.isoformat()}", }, indent=2), status=404, mimetype='application/json') headway_min = pd.concat(headway_min_arr) waits = pd.concat(waits) if end_stop_id and both_stops_same_dir: completed_trips = pd.concat(completed_trips) if headway_min.empty: return Response(json.dumps( { 'params': params, 'error': f"No arrivals for stop {start_stop_id} on route {route_id}", }, indent=2), status=404, mimetype='application/json') data = { 'params': params, 'route_title': route_config.title, 'start_stop_title': start_stop_info.title if start_stop_info else None, 'end_stop_title': end_stop_info.title if end_stop_info else None, 'directions': directions, 'headway_min': metrics.get_headways_stats(headway_min), 'wait_times': metrics.get_wait_times_stats(waits, tz), 'trip_times': metrics.get_trip_times_stats(completed_trips, start_stop_id, end_stop_id) if end_stop_id and both_stops_same_dir else None, } metrics_end = time.time() data['processing_time'] = (metrics_end - metrics_start) return Response(json.dumps(data, indent=2), mimetype='application/json')