def compute_stats_for_dates(dates, agency: config.Agency, scheduled=False, save_to_s3=True): routes = agency.get_route_list() if scheduled: computed_date_keys = {} for d in dates: date_key = timetables.get_date_key(agency.id, d) if date_key not in computed_date_keys: computed_date_keys[date_key] = True schedule_date = util.parse_date(date_key) compute_stats(schedule_date, agency, routes, scheduled=True, save_to_s3=save_to_s3) else: for d in dates: compute_stats(d, agency, routes, scheduled=False, save_to_s3=save_to_s3)
def compute_arrivals_for_date_and_start_hour(d: date, start_hour: int, agency: config.Agency, route_ids: list, save_to_s3=True): tz = agency.tz start_dt = tz.localize(datetime(d.year, d.month, d.day, hour=start_hour)) end_dt = start_dt + timedelta(days=1) start_time = int(start_dt.timestamp()) end_time = int(end_dt.timestamp()) print(f"time = [{start_dt}, {end_dt})") t1 = time.time() state = trynapi.get_state(agency.id, d, start_time, end_time, route_ids) print(f'retrieved state in {round(time.time()-t1,1)} sec') for i, route_id in enumerate(route_ids): route_state = state.get_for_route(route_id) if route_state is None: print(f'no state for route {route_id}') continue route_config = agency.get_route_config(route_id) t1 = time.time() arrivals_df = eclipses.find_arrivals(agency, route_state, route_config, d) history = arrival_history.from_data_frame(agency.id, route_id, arrivals_df, start_time, end_time) print(f'{route_id}: {round(time.time()-t1,1)} saving arrival history') arrival_history.save_for_date(history, d, save_to_s3) print(f'{route_id}: {round(time.time()-t1,2)} done')
def compute_stats(d: date, agency: config.Agency, routes, save_to_s3=True): tz = agency.tz stat_ids = all_stat_ids print(d) time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00','19:00')) timestamp_intervals = [( int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp()) ) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) all_stats = {} for stat_id in stat_ids: all_stats[stat_id] = {} for interval_index, _ in enumerate(timestamp_intervals): all_stats[stat_id][interval_index] = {} for route in routes: route_id = route.id print(route_id) t1 = time.time() route_config = agency.get_route_config(route_id) try: history = arrival_history.get_by_date(agency.id, route_id, d) except FileNotFoundError as ex: print(ex) continue try: timetable = timetables.get_by_date(agency.id, route_id, d) except FileNotFoundError as ex: print(ex) continue timetable_df = timetable.get_data_frame() history_df = history.get_data_frame() for stat_id in stat_ids: for interval_index, _ in enumerate(timestamp_intervals): all_stats[stat_id][interval_index][route_id] = {'directions':{}} for dir_info in route_config.get_direction_infos(): dir_id = dir_info.id all_stats[stat_id][interval_index][route_id]['directions'][dir_id] = collections.defaultdict(dict) add_trip_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df) add_wait_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df) add_schedule_adherence_stats_for_route(all_stats, timestamp_intervals, route_config, history_df, timetable_df) t2 = time.time() print(f' {round(t2-t1, 2)} sec') for stat_id in stat_ids: for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] data = { 'routes': all_stats[stat_id][interval_index], } precomputed_stats.save_stats(agency.id, stat_id, d, start_time_str, end_time_str, data, save_to_s3)
def compute_trip_times(d: date, agency: config.Agency, routes, save_to_s3=True, stat_ids=None): if stat_ids is None: stat_ids = stat_groups.keys() tz = agency.tz print(d) time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy() time_str_intervals.append(('07:00','19:00')) timestamp_intervals = [( int(util.get_localized_datetime(d, start_time_str, tz).timestamp()), int(util.get_localized_datetime(d, end_time_str, tz).timestamp()) ) for start_time_str, end_time_str in time_str_intervals ] timestamp_intervals.append((None, None)) time_str_intervals.append((None, None)) all_trip_time_stats = {} for interval_index, _ in enumerate(timestamp_intervals): all_trip_time_stats[interval_index] = {} for stat_id in stat_ids: all_trip_time_stats[interval_index][stat_id] = {} for route in routes: route_id = route.id print(route_id) t1 = time.time() route_config = agency.get_route_config(route_id) try: history = arrival_history.get_by_date(agency.id, route_id, d) except FileNotFoundError as ex: print(ex) continue route_df = history.get_data_frame().sort_values('TRIP', axis=0) add_trip_time_stats_for_route(all_trip_time_stats, timestamp_intervals, stat_ids, route_config, route_df) t2 = time.time() print(f' {round(t2-t1, 2)} sec') for interval_index, (start_time, end_time) in enumerate(timestamp_intervals): start_time_str, end_time_str = time_str_intervals[interval_index] for stat_id in stat_ids: stat = stat_groups[stat_id] data_str = json.dumps({ 'version': trip_times.DefaultVersion, 'start_time': start_time, 'end_time': end_time, 'stat': stat, 'routes': all_trip_time_stats[interval_index][stat_id] }, separators=(',', ':')) cache_path = trip_times.get_cache_path(agency.id, d, stat_id, start_time_str, end_time_str) print(cache_path) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents = True, exist_ok = True) print(f'saving to {cache_path}') with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = trip_times.get_s3_path(agency.id, d, stat_id, start_time_str, end_time_str) s3_bucket = config.s3_bucket print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put( Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read' )