def compute_trip_times(d: date, tz, agency_id, routes, save_to_s3=True, stat_ids=None): if stat_ids is None: stat_ids = stat_groups.keys() print(d) time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00', '19:00')) timestamp_intervals = [ (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp())) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) all_trip_time_stats = {} for interval_index, _ in enumerate(timestamp_intervals): all_trip_time_stats[interval_index] = {} for stat_id in stat_ids: all_trip_time_stats[interval_index][stat_id] = {} for route in routes: route_id = route.id print(route_id) t1 = time.time() route_config = nextbus.get_route_config(agency_id, route_id) try: history = arrival_history.get_by_date(agency_id, route_id, d) except FileNotFoundError as ex: print(ex) continue route_df = history.get_data_frame().sort_values('TRIP', axis=0) add_trip_time_stats_for_route(all_trip_time_stats, timestamp_intervals, stat_ids, route_config, route_df) t2 = time.time() print(f' {round(t2-t1, 2)} sec') for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] for stat_id in stat_ids: stat = stat_groups[stat_id] data_str = json.dumps( { 'version': trip_times.DefaultVersion, 'start_time': start_time, 'end_time': end_time, 'stat': stat, 'routes': all_trip_time_stats[interval_index][stat_id] }, separators=(',', ':')) cache_path = trip_times.get_cache_path(agency_id, d, stat_id, start_time_str, end_time_str) print(cache_path) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents=True, exist_ok=True) print(f'saving to {cache_path}') with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = trip_times.get_s3_path(agency_id, d, stat_id, start_time_str, end_time_str) s3_bucket = trip_times.get_s3_bucket() print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put(Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read')
def compute_stats(d: date, agency: config.Agency, routes, save_to_s3=True): tz = agency.tz stat_ids = all_stat_ids print(d) time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00','19:00')) timestamp_intervals = [( int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp()) ) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) all_stats = {} for stat_id in stat_ids: all_stats[stat_id] = {} for interval_index, _ in enumerate(timestamp_intervals): all_stats[stat_id][interval_index] = {} for route in routes: route_id = route.id print(route_id) t1 = time.time() route_config = agency.get_route_config(route_id) try: history = arrival_history.get_by_date(agency.id, route_id, d) except FileNotFoundError as ex: print(ex) continue try: timetable = timetables.get_by_date(agency.id, route_id, d) except FileNotFoundError as ex: print(ex) continue timetable_df = timetable.get_data_frame() history_df = history.get_data_frame() for stat_id in stat_ids: for interval_index, _ in enumerate(timestamp_intervals): all_stats[stat_id][interval_index][route_id] = {'directions':{}} for dir_info in route_config.get_direction_infos(): dir_id = dir_info.id all_stats[stat_id][interval_index][route_id]['directions'][dir_id] = collections.defaultdict(dict) add_trip_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df) add_wait_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df) add_schedule_adherence_stats_for_route(all_stats, timestamp_intervals, route_config, history_df, timetable_df) t2 = time.time() print(f' {round(t2-t1, 2)} sec') for stat_id in stat_ids: for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] data = { 'routes': all_stats[stat_id][interval_index], } precomputed_stats.save_stats(agency.id, stat_id, d, start_time_str, end_time_str, data, save_to_s3)
def compute_wait_times(agency_id, d: date, routes, tz, stat_ids, save_to_s3=False): print(d) all_wait_time_stats = {} time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00', '19:00')) timestamp_intervals = [ (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp())) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) for interval_index, _ in enumerate(timestamp_intervals): all_wait_time_stats[interval_index] = {} for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id] = {} for route in routes: route_id = route.id print(route_id) route_config = nextbus.get_route_config(agency_id, route_id) try: history = arrival_history.get_by_date(agency_id, route_id, d) except FileNotFoundError as ex: print(ex) continue for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id][route_id] = {} df = history.get_data_frame() df = df.sort_values('TIME', axis=0) for dir_info in route_config.get_direction_infos(): dir_id = dir_info.id for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: all_wait_time_stats[interval_index][stat_id][route_id][ dir_id] = {} stop_ids = dir_info.get_stop_ids() sid_values = df['SID'].values for i, stop_id in enumerate(stop_ids): stop_df = df[sid_values == stop_id] all_time_values = stop_df['TIME'].values for interval_index, ( start_time, end_time) in enumerate(timestamp_intervals): wait_time_stats = wait_times.get_stats( all_time_values, start_time, end_time) add_wait_time_stats_for_stop( all_wait_time_stats[interval_index], stat_ids, route_id, dir_id, stop_id, wait_time_stats) for interval_index, _ in enumerate(timestamp_intervals): for stat_id in stat_ids: add_median_wait_time_stats_for_direction( all_wait_time_stats[interval_index][stat_id][route_id] [dir_id], stat_id) for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] for stat_id in stat_ids: stat = stat_groups[stat_id] data_str = json.dumps( { 'version': wait_times.DefaultVersion, 'start_time': start_time, 'end_time': end_time, 'stat': stat, 'routes': all_wait_time_stats[interval_index][stat_id] }, separators=(',', ':')) cache_path = wait_times.get_cache_path(agency_id, d, stat_id, start_time_str, end_time_str) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents=True, exist_ok=True) print(f'saving to {cache_path}') with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = wait_times.get_s3_path(agency_id, d, stat_id, start_time_str, end_time_str) s3_bucket = wait_times.get_s3_bucket() print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put(Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read')
route_ids = args.route agency_id = 'sf-muni' if route_ids is None: route_ids = [route.id for route in nextbus.get_route_list(agency_id)] date_str = args.date d = util.parse_date(date_str) start_time_str = args.start_time if start_time_str is None: start_time_str = '03:00' end_time_str = args.end_time if end_time_str is None: end_time_str = '03:00+1' tz = pytz.timezone('US/Pacific') local_start = util.get_localized_datetime(d, start_time_str, tz) local_end = util.get_localized_datetime(d, end_time_str, tz) print(f"route_ids = {route_ids}") print(f"start = {local_start}") print(f"end = {local_end}") state = trynapi.get_state(agency_id, d, local_start.timestamp(), local_end.timestamp(), route_ids)