def compute_stats_for_dates(dates,
                            agency: config.Agency,
                            scheduled=False,
                            save_to_s3=True):

    routes = agency.get_route_list()

    if scheduled:
        computed_date_keys = {}
        for d in dates:
            date_key = timetables.get_date_key(agency.id, d)
            if date_key not in computed_date_keys:
                computed_date_keys[date_key] = True
                schedule_date = util.parse_date(date_key)
                compute_stats(schedule_date,
                              agency,
                              routes,
                              scheduled=True,
                              save_to_s3=save_to_s3)
    else:
        for d in dates:
            compute_stats(d,
                          agency,
                          routes,
                          scheduled=False,
                          save_to_s3=save_to_s3)
Exemple #2
0
def compute_arrivals_for_date_and_start_hour(d: date,
                                             start_hour: int,
                                             agency: config.Agency,
                                             route_ids: list,
                                             save_to_s3=True):

    tz = agency.tz

    start_dt = tz.localize(datetime(d.year, d.month, d.day, hour=start_hour))
    end_dt = start_dt + timedelta(days=1)

    start_time = int(start_dt.timestamp())
    end_time = int(end_dt.timestamp())

    print(f"time = [{start_dt}, {end_dt})")

    t1 = time.time()

    state = trynapi.get_state(agency.id, d, start_time, end_time, route_ids)

    print(f'retrieved state in {round(time.time()-t1,1)} sec')

    for i, route_id in enumerate(route_ids):
        route_state = state.get_for_route(route_id)

        if route_state is None:
            print(f'no state for route {route_id}')
            continue

        route_config = agency.get_route_config(route_id)

        t1 = time.time()

        arrivals_df = eclipses.find_arrivals(agency, route_state, route_config,
                                             d)

        history = arrival_history.from_data_frame(agency.id, route_id,
                                                  arrivals_df, start_time,
                                                  end_time)

        print(f'{route_id}: {round(time.time()-t1,1)} saving arrival history')

        arrival_history.save_for_date(history, d, save_to_s3)

        print(f'{route_id}: {round(time.time()-t1,2)} done')
def compute_stats(d: date, agency: config.Agency, routes, save_to_s3=True):

    tz = agency.tz
    stat_ids = all_stat_ids

    print(d)

    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00','19:00'))

    timestamp_intervals = [(
            int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
            int(util.get_localized_datetime(d, end_time_str, tz).timestamp())
        ) for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    all_stats = {}

    for stat_id in stat_ids:
        all_stats[stat_id] = {}

        for interval_index, _ in enumerate(timestamp_intervals):
            all_stats[stat_id][interval_index] = {}

    for route in routes:
        route_id = route.id
        print(route_id)

        t1 = time.time()

        route_config = agency.get_route_config(route_id)

        try:
            history = arrival_history.get_by_date(agency.id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        try:
            timetable = timetables.get_by_date(agency.id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        timetable_df = timetable.get_data_frame()

        history_df = history.get_data_frame()

        for stat_id in stat_ids:
            for interval_index, _ in enumerate(timestamp_intervals):
                all_stats[stat_id][interval_index][route_id] = {'directions':{}}

                for dir_info in route_config.get_direction_infos():
                    dir_id = dir_info.id

                    all_stats[stat_id][interval_index][route_id]['directions'][dir_id] = collections.defaultdict(dict)

        add_trip_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df)
        add_wait_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df)
        add_schedule_adherence_stats_for_route(all_stats, timestamp_intervals, route_config, history_df, timetable_df)

        t2 = time.time()
        print(f' {round(t2-t1, 2)} sec')

    for stat_id in stat_ids:
        for interval_index, (start_time, end_time) in enumerate(timestamp_intervals):
            start_time_str, end_time_str = time_str_intervals[interval_index]

            data = {
                'routes': all_stats[stat_id][interval_index],
            }
            precomputed_stats.save_stats(agency.id, stat_id, d, start_time_str, end_time_str, data, save_to_s3)
Exemple #4
0
def compute_trip_times(d: date, agency: config.Agency, routes, save_to_s3=True, stat_ids=None):
    if stat_ids is None:
        stat_ids = stat_groups.keys()

    tz = agency.tz

    print(d)

    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00','19:00'))

    timestamp_intervals = [(
            int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
            int(util.get_localized_datetime(d, end_time_str, tz).timestamp())
        ) for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    all_trip_time_stats = {}
    for interval_index, _ in enumerate(timestamp_intervals):
        all_trip_time_stats[interval_index] = {}
        for stat_id in stat_ids:
            all_trip_time_stats[interval_index][stat_id] = {}

    for route in routes:
        route_id = route.id
        print(route_id)
        t1 = time.time()

        route_config = agency.get_route_config(route_id)

        try:
            history = arrival_history.get_by_date(agency.id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        route_df = history.get_data_frame().sort_values('TRIP', axis=0)

        add_trip_time_stats_for_route(all_trip_time_stats,
            timestamp_intervals, stat_ids, route_config, route_df)

        t2 = time.time()
        print(f' {round(t2-t1, 2)} sec')

    for interval_index, (start_time, end_time) in enumerate(timestamp_intervals):
        start_time_str, end_time_str = time_str_intervals[interval_index]

        for stat_id in stat_ids:
            stat = stat_groups[stat_id]
            data_str = json.dumps({
                'version': trip_times.DefaultVersion,
                'start_time': start_time,
                'end_time': end_time,
                'stat': stat,
                'routes': all_trip_time_stats[interval_index][stat_id]
            }, separators=(',', ':'))

            cache_path = trip_times.get_cache_path(agency.id, d, stat_id, start_time_str, end_time_str)

            print(cache_path)

            cache_dir = Path(cache_path).parent
            if not cache_dir.exists():
                cache_dir.mkdir(parents = True, exist_ok = True)

            print(f'saving to {cache_path}')
            with open(cache_path, "w") as f:
                f.write(data_str)

            if save_to_s3:
                s3 = boto3.resource('s3')
                s3_path = trip_times.get_s3_path(agency.id, d, stat_id, start_time_str, end_time_str)
                s3_bucket = config.s3_bucket
                print(f'saving to s3://{s3_bucket}/{s3_path}')
                object = s3.Object(s3_bucket, s3_path)
                object.put(
                    Body=gzip.compress(bytes(data_str, 'utf-8')),
                    CacheControl='max-age=86400',
                    ContentType='application/json',
                    ContentEncoding='gzip',
                    ACL='public-read'
                )