start_time_str = args.start_time end_time_str = args.end_time route_config = nextbus.get_route_config('sf-muni', route_id) tz = pytz.timezone('US/Pacific') dates = util.get_dates_in_range(args.date, args.date) print(f"Date: {', '.join([str(date) for date in dates])}") print(f"Time of Day: [{start_time_str}, {end_time_str})") print(f"Route: {route_id} ({route_config.title})") print(f"Vehicle: {vid}") for d in dates: history = arrival_history.get_by_date(agency, route_id, d) df = history.get_data_frame(vehicle_id=vid, tz=tz, start_time_str=start_time_str, end_time_str=end_time_str) if df.empty: print(f"no arrival times found for vehicle {vid} on {date_str}") continue df = df.sort_values('TIME', axis=0) for index, row in df.iterrows(): stop_id = row.SID stop_info = route_config.get_stop_info(stop_id)
def compute_trip_times(d: date, tz, agency_id, routes, save_to_s3=True, stat_ids=None): if stat_ids is None: stat_ids = stat_groups.keys() print(d) time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00', '19:00')) timestamp_intervals = [ (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp())) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) all_trip_time_stats = {} for interval_index, _ in enumerate(timestamp_intervals): all_trip_time_stats[interval_index] = {} for stat_id in stat_ids: all_trip_time_stats[interval_index][stat_id] = {} for route in routes: route_id = route.id print(route_id) t1 = time.time() route_config = nextbus.get_route_config(agency_id, route_id) try: history = arrival_history.get_by_date(agency_id, route_id, d) except FileNotFoundError as ex: print(ex) continue route_df = history.get_data_frame().sort_values('TRIP', axis=0) add_trip_time_stats_for_route(all_trip_time_stats, timestamp_intervals, stat_ids, route_config, route_df) t2 = time.time() print(f' {round(t2-t1, 2)} sec') for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] for stat_id in stat_ids: stat = stat_groups[stat_id] data_str = json.dumps( { 'version': trip_times.DefaultVersion, 'start_time': start_time, 'end_time': end_time, 'stat': stat, 'routes': all_trip_time_stats[interval_index][stat_id] }, separators=(',', ':')) cache_path = trip_times.get_cache_path(agency_id, d, stat_id, start_time_str, end_time_str) print(cache_path) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents=True, exist_ok=True) print(f'saving to {cache_path}') with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = trip_times.get_s3_path(agency_id, d, stat_id, start_time_str, end_time_str) s3_bucket = trip_times.get_s3_bucket() print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put(Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read')
route_config = agency.get_route_config(route_id) tz = agency.tz dates = util.get_dates_in_range(args.date, args.date) print(f"Date: {', '.join([str(date) for date in dates])}") print(f"Time of Day: [{start_time_str}, {end_time_str})") print(f"Route: {route_id} ({route_config.title})") print(f"Vehicle: {vid}") num_stops = 0 for d in dates: history = arrival_history.get_by_date(agency.id, route_id, d, version) start_time = util.get_timestamp_or_none(d, start_time_str, tz) end_time = util.get_timestamp_or_none(d, end_time_str, tz) df = history.get_data_frame(vehicle_id=vid, direction_id=direction_id, start_time=start_time, end_time=end_time) if df.empty: print(f"no arrival times found for vehicle {vid} on {date_str}") continue df = df.sort_values(['TIME', 'TRIP'], axis=0) df['DATE_TIME'] = df['TIME'].apply(
dates = util.get_dates_in_range(args.date, args.date) print(f"Date: {', '.join([str(date) for date in dates])}") print(f"Time of Day: [{start_time_str}, {end_time_str})") def render_distance(dist): return '----' if np.isnan(dist) else ('%3dm' % dist) for route_id in route_ids: route_config = agency.get_route_config(route_id) df = pd.concat([ arrival_history.get_by_date(agency.id, route_id, d, version) \ .get_data_frame( start_time = util.get_timestamp_or_none(d, start_time_str, tz), end_time = util.get_timestamp_or_none(d, end_time_str, tz) ) for d in dates ]) print(f"Route: {route_id} ({route_config.title})") dir_infos = route_config.get_direction_infos() for dir_info in dir_infos: print(f"Direction: {dir_info.title} ({dir_info.id})") prev_stop_info = None for dir_index, stop_id in enumerate(dir_info.get_stop_ids()):
def compute_wait_times(agency_id, d: date, routes, tz, stat_ids, save_to_s3=False): print(d) all_wait_time_stats = {} time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00', '19:00')) timestamp_intervals = [ (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp())) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) for interval_index, _ in enumerate(timestamp_intervals): all_wait_time_stats[interval_index] = {} for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id] = {} for route in routes: route_id = route.id print(route_id) route_config = nextbus.get_route_config(agency_id, route_id) try: history = arrival_history.get_by_date(agency_id, route_id, d) except FileNotFoundError as ex: print(ex) continue for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id][route_id] = {} df = history.get_data_frame() df = df.sort_values('TIME', axis=0) for dir_info in route_config.get_direction_infos(): dir_id = dir_info.id for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id][route_id][ dir_id] = {} stop_ids = dir_info.get_stop_ids() sid_values = df['SID'].values for i, stop_id in enumerate(stop_ids): stop_df = df[sid_values == stop_id] all_time_values = stop_df['TIME'].values for interval_index, ( start_time, end_time) in enumerate(timestamp_intervals): wait_time_stats = wait_times.get_stats( all_time_values, start_time, end_time) add_wait_time_stats_for_stop( all_wait_time_stats[interval_index], stat_ids, route_id, dir_id, stop_id, wait_time_stats) for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: add_median_wait_time_stats_for_direction( all_wait_time_stats[interval_index][stat_id][route_id] [dir_id], stat_id) for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] for stat_id in stat_ids: stat = stat_groups[stat_id] data_str = json.dumps( { 'version': wait_times.DefaultVersion, 'start_time': start_time, 'end_time': end_time, 'stat': stat, 'routes': all_wait_time_stats[interval_index][stat_id] }, separators=(',', ':')) cache_path = wait_times.get_cache_path(agency_id, d, stat_id, start_time_str, end_time_str) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents=True, exist_ok=True) print(f'saving to {cache_path}') with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = wait_times.get_s3_path(agency_id, d, stat_id, start_time_str, end_time_str) s3_bucket = wait_times.get_s3_bucket() print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put(Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read')
def compute_stats(d: date, agency: config.Agency, routes, save_to_s3=True): tz = agency.tz stat_ids = all_stat_ids print(d) time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00','19:00')) timestamp_intervals = [( int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp()) ) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) all_stats = {} for stat_id in stat_ids: all_stats[stat_id] = {} for interval_index, _ in enumerate(timestamp_intervals): all_stats[stat_id][interval_index] = {} for route in routes: route_id = route.id print(route_id) t1 = time.time() route_config = agency.get_route_config(route_id) try: history = arrival_history.get_by_date(agency.id, route_id, d) except FileNotFoundError as ex: print(ex) continue try: timetable = timetables.get_by_date(agency.id, route_id, d) except FileNotFoundError as ex: print(ex) continue timetable_df = timetable.get_data_frame() history_df = history.get_data_frame() for stat_id in stat_ids: for interval_index, _ in enumerate(timestamp_intervals): all_stats[stat_id][interval_index][route_id] = {'directions':{}} for dir_info in route_config.get_direction_infos(): dir_id = dir_info.id all_stats[stat_id][interval_index][route_id]['directions'][dir_id] = collections.defaultdict(dict) add_trip_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df) add_wait_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df) add_schedule_adherence_stats_for_route(all_stats, timestamp_intervals, route_config, history_df, timetable_df) t2 = time.time() print(f' {round(t2-t1, 2)} sec') for stat_id in stat_ids: for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] data = { 'routes': all_stats[stat_id][interval_index], } precomputed_stats.save_stats(agency.id, stat_id, d, start_time_str, end_time_str, data, save_to_s3)
stop_rows = [] dates = util.get_dates_in_range(args.date, args.date) print(f"Date: {', '.join([str(date) for date in dates])}") print(f"Time of Day: [{start_time_str}, {end_time_str})") def render_distance(dist): return '----' if np.isnan(dist) else ('%3dm' % dist) for route_id in route_ids: route_config = nextbus.get_route_config(agency_id, route_id) df = pd.concat([ arrival_history.get_by_date(agency_id, route_id, d, version) \ .get_data_frame(start_time_str = start_time_str, end_time_str = end_time_str, tz = tz) for d in dates ]) print(f"Route: {route_id} ({route_config.title})") dir_infos = route_config.get_direction_infos() for dir_info in dir_infos: print(f"Direction: {dir_info.title} ({dir_info.id})") prev_stop_info = None for dir_index, stop_id in enumerate(dir_info.get_stop_ids()): stop_info = route_config.get_stop_info(stop_id)
if direction_id: dir_info = route_configs[route_id].get_direction_info( direction_id) if direction_id else None print( f"Direction: {dir_info.title if dir_info else '?'} ({direction_id})" ) if stop_id: stop_info = route_configs[route_id].get_stop_info( stop_id) if route_id else None print(f"Stop: {stop_info.title if stop_info else '?'} ({stop_id})") for d in dates: for route_id in route_ids: base_history = arrival_history.get_by_date(agency_id, route_id, d, base_version) other_history = arrival_history.get_by_date( agency_id, route_id, d, other_version) base_df = base_history.get_data_frame( stop_id=stop_id, direction_id=direction_id).sort_values('TIME', axis=0) other_df = other_history.get_data_frame( stop_id=stop_id, direction_id=direction_id).sort_values('TIME', axis=0) base_trips += len(np.unique(base_df['TRIP'])) other_trips += len(np.unique(other_df['TRIP'])) def find_other_arrival_time(row): other_time = other_history.find_closest_arrival_time(
def metrics_page(): metrics_start = time.time() route_id = request.args.get('route_id') if route_id is None: route_id = '12' start_stop_id = request.args.get('start_stop_id') if start_stop_id is None: start_stop_id = '3476' end_stop_id = request.args.get('end_stop_id') direction_id = request.args.get('direction_id') start_date_str = request.args.get('start_date') end_date_str = request.args.get('end_date') date_str = request.args.get('date') if date_str is not None: start_date_str = end_date_str = date_str else: if start_date_str is None: start_date_str = '2019-02-01' if end_date_str is None: end_date_str = start_date_str start_time_str = request.args.get( 'start_time') # e.g. "14:00" (24h time of day) end_time_str = request.args.get( 'end_time') # e.g. "18:00" (24h time of day) params = { 'start_stop_id': start_stop_id, 'end_stop_id': end_stop_id, 'route_id': route_id, 'direction_id': direction_id, 'start_date': start_date_str, 'end_date': end_date_str, 'start_time': start_time_str, 'end_time': end_time_str, } try: dates = util.get_dates_in_range(start_date_str, end_date_str) except Exception as ex: return Response(json.dumps({ 'params': params, 'error': str(ex), }, indent=2), status=400, mimetype='application/json') tz = pytz.timezone('US/Pacific') route_config = nextbus.get_route_config('sf-muni', route_id) start_stop_info = route_config.get_stop_info(start_stop_id) end_stop_info = route_config.get_stop_info( end_stop_id) if end_stop_id else None # 404 if the given stop isn't on the route # TODO: what should be done for the case where the start stop id is valid but the end stop id isn't? if start_stop_info is None: return Response(json.dumps( { 'params': params, 'error': f"Stop {start_stop_id} is not on route {route_id}", }, indent=2), status=404, mimetype='application/json') if direction_id is not None: dir_info = route_config.get_direction_info(direction_id) if dir_info is not None: dir_infos = [dir_info] else: dir_infos = [] else: # TODO: validation for end_stop_id directions if given (see trips.py) dirs = route_config.get_directions_for_stop(start_stop_id) dir_infos = [ route_config.get_direction_info(direction) for direction in dirs ] if end_stop_id: end_stop_dirs = route_config.get_directions_for_stop(end_stop_id) both_stops_same_dir = direction_id in end_stop_dirs directions = [{ 'id': dir_info.id, 'title': dir_info.title } for dir_info in dir_infos] headway_min_arr = [] waits = [] if end_stop_id: completed_trips = [] for d in dates: try: history = arrival_history.get_by_date('sf-muni', route_id, d) df = history.get_data_frame(start_stop_id, tz=tz, direction_id=direction_id, start_time_str=start_time_str, end_time_str=end_time_str) # get all headways for the selected stop (arrival time minus previous arrival time), computed separately for each day df['headway_min'] = metrics.compute_headway_minutes(df) # temporarily skip calculation of wait times until data is shown in front end waits.append( wait_times.get_waits(df, start_stop_info, d, tz, route_id, start_time_str, end_time_str)) if end_stop_id and both_stops_same_dir: trips = trip_times.get_trip_times(df, history, tz, start_stop_id, end_stop_id) completed_trips.append( trips.trip_min[trips.trip_min.notnull()]) headway_min = df.headway_min[df.headway_min.notnull( )] # remove NaN row (first bus of the day) headway_min_arr.append(df.headway_min) except FileNotFoundError as ex: return Response(json.dumps( { 'params': params, 'error': f"Arrival history not found for route {route_id} on {d.isoformat()}", }, indent=2), status=404, mimetype='application/json') except IndexError as ex: return Response(json.dumps( { 'params': params, 'error': f"No arrivals found for stop {start_stop_id} on route {route_id} in direction {direction_id} on {d.isoformat()}", }, indent=2), status=404, mimetype='application/json') headway_min = pd.concat(headway_min_arr) waits = pd.concat(waits) if end_stop_id and both_stops_same_dir: completed_trips = pd.concat(completed_trips) if headway_min.empty: return Response(json.dumps( { 'params': params, 'error': f"No arrivals for stop {start_stop_id} on route {route_id}", }, indent=2), status=404, mimetype='application/json') data = { 'params': params, 'route_title': route_config.title, 'start_stop_title': start_stop_info.title if start_stop_info else None, 'end_stop_title': end_stop_info.title if end_stop_info else None, 'directions': directions, 'headway_min': metrics.get_headways_stats(headway_min), 'wait_times': metrics.get_wait_times_stats(waits, tz), 'trip_times': metrics.get_trip_times_stats(completed_trips, start_stop_id, end_stop_id) if end_stop_id and both_stops_same_dir else None, } metrics_end = time.time() data['processing_time'] = (metrics_end - metrics_start) return Response(json.dumps(data, indent=2), mimetype='application/json')