def compute_trip_times(d: date,
                       tz,
                       agency_id,
                       routes,
                       save_to_s3=True,
                       stat_ids=None):
    if stat_ids is None:
        stat_ids = stat_groups.keys()

    print(d)
    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00', '19:00'))

    timestamp_intervals = [
        (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
         int(util.get_localized_datetime(d, end_time_str, tz).timestamp()))
        for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    all_trip_time_stats = {}
    for interval_index, _ in enumerate(timestamp_intervals):
        all_trip_time_stats[interval_index] = {}
        for stat_id in stat_ids:
            all_trip_time_stats[interval_index][stat_id] = {}

    for route in routes:
        route_id = route.id
        print(route_id)
        t1 = time.time()

        route_config = nextbus.get_route_config(agency_id, route_id)

        try:
            history = arrival_history.get_by_date(agency_id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        route_df = history.get_data_frame().sort_values('TRIP', axis=0)

        add_trip_time_stats_for_route(all_trip_time_stats, timestamp_intervals,
                                      stat_ids, route_config, route_df)

        t2 = time.time()
        print(f' {round(t2-t1, 2)} sec')

    for interval_index, (start_time,
                         end_time) in enumerate(timestamp_intervals):
        start_time_str, end_time_str = time_str_intervals[interval_index]

        for stat_id in stat_ids:
            stat = stat_groups[stat_id]
            data_str = json.dumps(
                {
                    'version': trip_times.DefaultVersion,
                    'start_time': start_time,
                    'end_time': end_time,
                    'stat': stat,
                    'routes': all_trip_time_stats[interval_index][stat_id]
                },
                separators=(',', ':'))

            cache_path = trip_times.get_cache_path(agency_id, d, stat_id,
                                                   start_time_str,
                                                   end_time_str)

            print(cache_path)

            cache_dir = Path(cache_path).parent
            if not cache_dir.exists():
                cache_dir.mkdir(parents=True, exist_ok=True)

            print(f'saving to {cache_path}')
            with open(cache_path, "w") as f:
                f.write(data_str)

            if save_to_s3:
                s3 = boto3.resource('s3')
                s3_path = trip_times.get_s3_path(agency_id, d, stat_id,
                                                 start_time_str, end_time_str)
                s3_bucket = trip_times.get_s3_bucket()
                print(f'saving to s3://{s3_bucket}/{s3_path}')
                object = s3.Object(s3_bucket, s3_path)
                object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                           CacheControl='max-age=86400',
                           ContentType='application/json',
                           ContentEncoding='gzip',
                           ACL='public-read')
Example #2
0
def compute_stats(d: date, agency: config.Agency, routes, save_to_s3=True):

    tz = agency.tz
    stat_ids = all_stat_ids

    print(d)

    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00','19:00'))

    timestamp_intervals = [(
            int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
            int(util.get_localized_datetime(d, end_time_str, tz).timestamp())
        ) for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    all_stats = {}

    for stat_id in stat_ids:
        all_stats[stat_id] = {}

        for interval_index, _ in enumerate(timestamp_intervals):
            all_stats[stat_id][interval_index] = {}

    for route in routes:
        route_id = route.id
        print(route_id)

        t1 = time.time()

        route_config = agency.get_route_config(route_id)

        try:
            history = arrival_history.get_by_date(agency.id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        try:
            timetable = timetables.get_by_date(agency.id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        timetable_df = timetable.get_data_frame()

        history_df = history.get_data_frame()

        for stat_id in stat_ids:
            for interval_index, _ in enumerate(timestamp_intervals):
                all_stats[stat_id][interval_index][route_id] = {'directions':{}}

                for dir_info in route_config.get_direction_infos():
                    dir_id = dir_info.id

                    all_stats[stat_id][interval_index][route_id]['directions'][dir_id] = collections.defaultdict(dict)

        add_trip_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df)
        add_wait_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df)
        add_schedule_adherence_stats_for_route(all_stats, timestamp_intervals, route_config, history_df, timetable_df)

        t2 = time.time()
        print(f' {round(t2-t1, 2)} sec')

    for stat_id in stat_ids:
        for interval_index, (start_time, end_time) in enumerate(timestamp_intervals):
            start_time_str, end_time_str = time_str_intervals[interval_index]

            data = {
                'routes': all_stats[stat_id][interval_index],
            }
            precomputed_stats.save_stats(agency.id, stat_id, d, start_time_str, end_time_str, data, save_to_s3)
Example #3
0
def compute_wait_times(agency_id,
                       d: date,
                       routes,
                       tz,
                       stat_ids,
                       save_to_s3=False):
    print(d)
    all_wait_time_stats = {}

    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00', '19:00'))

    timestamp_intervals = [
        (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
         int(util.get_localized_datetime(d, end_time_str, tz).timestamp()))
        for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    for interval_index, _ in enumerate(timestamp_intervals):
        all_wait_time_stats[interval_index] = {}
        for stat_id in stat_ids:
            all_wait_time_stats[interval_index][stat_id] = {}

    for route in routes:
        route_id = route.id

        print(route_id)
        route_config = nextbus.get_route_config(agency_id, route_id)

        try:
            history = arrival_history.get_by_date(agency_id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        for interval_index, _ in enumerate(timestamp_intervals):
            for stat_id in stat_ids:
                all_wait_time_stats[interval_index][stat_id][route_id] = {}

        df = history.get_data_frame()
        df = df.sort_values('TIME', axis=0)

        for dir_info in route_config.get_direction_infos():

            dir_id = dir_info.id

            for interval_index, _ in enumerate(timestamp_intervals):
                for stat_id in stat_ids:
                    all_wait_time_stats[interval_index][stat_id][route_id][
                        dir_id] = {}

            stop_ids = dir_info.get_stop_ids()
            sid_values = df['SID'].values

            for i, stop_id in enumerate(stop_ids):
                stop_df = df[sid_values == stop_id]

                all_time_values = stop_df['TIME'].values

                for interval_index, (
                        start_time,
                        end_time) in enumerate(timestamp_intervals):
                    wait_time_stats = wait_times.get_stats(
                        all_time_values, start_time, end_time)

                    add_wait_time_stats_for_stop(
                        all_wait_time_stats[interval_index], stat_ids,
                        route_id, dir_id, stop_id, wait_time_stats)

            for interval_index, _ in enumerate(timestamp_intervals):
                for stat_id in stat_ids:
                    add_median_wait_time_stats_for_direction(
                        all_wait_time_stats[interval_index][stat_id][route_id]
                        [dir_id], stat_id)

    for interval_index, (start_time,
                         end_time) in enumerate(timestamp_intervals):
        start_time_str, end_time_str = time_str_intervals[interval_index]

        for stat_id in stat_ids:
            stat = stat_groups[stat_id]

            data_str = json.dumps(
                {
                    'version': wait_times.DefaultVersion,
                    'start_time': start_time,
                    'end_time': end_time,
                    'stat': stat,
                    'routes': all_wait_time_stats[interval_index][stat_id]
                },
                separators=(',', ':'))

            cache_path = wait_times.get_cache_path(agency_id, d, stat_id,
                                                   start_time_str,
                                                   end_time_str)

            cache_dir = Path(cache_path).parent
            if not cache_dir.exists():
                cache_dir.mkdir(parents=True, exist_ok=True)

            print(f'saving to {cache_path}')
            with open(cache_path, "w") as f:
                f.write(data_str)

            if save_to_s3:
                s3 = boto3.resource('s3')
                s3_path = wait_times.get_s3_path(agency_id, d, stat_id,
                                                 start_time_str, end_time_str)
                s3_bucket = wait_times.get_s3_bucket()
                print(f'saving to s3://{s3_bucket}/{s3_path}')
                object = s3.Object(s3_bucket, s3_path)
                object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                           CacheControl='max-age=86400',
                           ContentType='application/json',
                           ContentEncoding='gzip',
                           ACL='public-read')
Example #4
0
    route_ids = args.route

    agency_id = 'sf-muni'

    if route_ids is None:
        route_ids = [route.id for route in nextbus.get_route_list(agency_id)]

    date_str = args.date

    d = util.parse_date(date_str)

    start_time_str = args.start_time
    if start_time_str is None:
        start_time_str = '03:00'

    end_time_str = args.end_time
    if end_time_str is None:
        end_time_str = '03:00+1'

    tz = pytz.timezone('US/Pacific')
    local_start = util.get_localized_datetime(d, start_time_str, tz)
    local_end = util.get_localized_datetime(d, end_time_str, tz)

    print(f"route_ids = {route_ids}")
    print(f"start = {local_start}")
    print(f"end = {local_end}")

    state = trynapi.get_state(agency_id, d, local_start.timestamp(),
                              local_end.timestamp(), route_ids)