Ejemplo n.º 1
0
def add_wait_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df):
    route_id = route_config.id
    history_df = history_df.sort_values('DEPARTURE_TIME', axis=0)
    sid_values = history_df['SID'].values
    did_values = history_df['DID'].values

    for dir_info in route_config.get_direction_infos():

        dir_id = dir_info.id
        stop_ids = dir_info.get_stop_ids()

        for i, stop_id in enumerate(stop_ids):
            stop_df = history_df[(sid_values == stop_id) & (did_values == dir_id)]

            all_time_values = stop_df['DEPARTURE_TIME'].values

            for interval_index, (start_time, end_time) in enumerate(timestamp_intervals):
                wait_time_stats = wait_times.get_stats(all_time_values, start_time, end_time)

                median_wait_time = wait_time_stats.get_quantile(0.5)
                if median_wait_time is not None:
                    dir_stats = all_stats['combined'][interval_index][route_id]['directions'][dir_id]
                    dir_stats['medianWaitTimes'][stop_id] = round(median_wait_time, 1)

        for interval_index, (start_time, end_time) in enumerate(timestamp_intervals):
            add_median_wait_time_stats_for_direction(
                all_stats['combined'][interval_index][route_id]['directions'][dir_id]
            )
def add_headway_and_wait_time_stats_for_route(all_stats, timestamp_intervals,
                                              route_config, df):
    route_id = route_config.id
    df = df.sort_values('DEPARTURE_TIME', axis=0)
    sid_values = df['SID'].values
    did_values = df['DID'].values

    for dir_info in route_config.get_direction_infos():

        dir_id = dir_info.id
        stop_ids = dir_info.get_stop_ids()

        all_median_headways = collections.defaultdict(list)
        all_median_wait_times = collections.defaultdict(list)

        for i, stop_id in enumerate(stop_ids):
            stop_df = df[(sid_values == stop_id) & (did_values == dir_id)]

            all_time_values = stop_df['DEPARTURE_TIME'].values

            for interval_index, (start_time,
                                 end_time) in enumerate(timestamp_intervals):

                dir_stats = all_stats[StatIds.Combined][interval_index][
                    route_id]['directions'][dir_id]

                headways = metrics.compute_headway_minutes(
                    all_time_values, start_time, end_time)

                if len(headways) > 0:
                    all_median_headways[interval_index].append(
                        np.median(headways))

                wait_time_stats = wait_times.get_stats(all_time_values,
                                                       start_time, end_time)

                median_wait_time = wait_time_stats.get_quantile(0.5)
                if median_wait_time is not None:
                    all_median_wait_times[interval_index].append(
                        median_wait_time)
                    all_stats[StatIds.MedianTripTimes][interval_index][
                        route_id]['directions'][dir_id]['medianWaitTimes'][
                            stop_id] = round(median_wait_time, 1)

        for interval_index, (start_time,
                             end_time) in enumerate(timestamp_intervals):
            dir_stats = all_stats[StatIds.Combined][interval_index][route_id][
                'directions'][dir_id]

            dir_stats['medianWaitTime'] = get_median_or_none(
                all_median_wait_times[interval_index])
            dir_stats['medianHeadway'] = get_median_or_none(
                all_median_headways[interval_index])
Ejemplo n.º 3
0
def compute_wait_times(agency_id,
                       d: date,
                       routes,
                       tz,
                       stat_ids,
                       save_to_s3=False):
    print(d)
    all_wait_time_stats = {}

    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00', '19:00'))

    timestamp_intervals = [
        (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
         int(util.get_localized_datetime(d, end_time_str, tz).timestamp()))
        for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    for interval_index, _ in enumerate(timestamp_intervals):
        all_wait_time_stats[interval_index] = {}
        for stat_id in stat_ids:
            all_wait_time_stats[interval_index][stat_id] = {}

    for route in routes:
        route_id = route.id

        print(route_id)
        route_config = nextbus.get_route_config(agency_id, route_id)

        try:
            history = arrival_history.get_by_date(agency_id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        for interval_index, _ in enumerate(timestamp_intervals):
            for stat_id in stat_ids:
                all_wait_time_stats[interval_index][stat_id][route_id] = {}

        df = history.get_data_frame()
        df = df.sort_values('TIME', axis=0)

        for dir_info in route_config.get_direction_infos():

            dir_id = dir_info.id

            for interval_index, _ in enumerate(timestamp_intervals):
                for stat_id in stat_ids:
                    all_wait_time_stats[interval_index][stat_id][route_id][
                        dir_id] = {}

            stop_ids = dir_info.get_stop_ids()
            sid_values = df['SID'].values

            for i, stop_id in enumerate(stop_ids):
                stop_df = df[sid_values == stop_id]

                all_time_values = stop_df['TIME'].values

                for interval_index, (
                        start_time,
                        end_time) in enumerate(timestamp_intervals):
                    wait_time_stats = wait_times.get_stats(
                        all_time_values, start_time, end_time)

                    add_wait_time_stats_for_stop(
                        all_wait_time_stats[interval_index], stat_ids,
                        route_id, dir_id, stop_id, wait_time_stats)

            for interval_index, _ in enumerate(timestamp_intervals):
                for stat_id in stat_ids:
                    add_median_wait_time_stats_for_direction(
                        all_wait_time_stats[interval_index][stat_id][route_id]
                        [dir_id], stat_id)

    for interval_index, (start_time,
                         end_time) in enumerate(timestamp_intervals):
        start_time_str, end_time_str = time_str_intervals[interval_index]

        for stat_id in stat_ids:
            stat = stat_groups[stat_id]

            data_str = json.dumps(
                {
                    'version': wait_times.DefaultVersion,
                    'start_time': start_time,
                    'end_time': end_time,
                    'stat': stat,
                    'routes': all_wait_time_stats[interval_index][stat_id]
                },
                separators=(',', ':'))

            cache_path = wait_times.get_cache_path(agency_id, d, stat_id,
                                                   start_time_str,
                                                   end_time_str)

            cache_dir = Path(cache_path).parent
            if not cache_dir.exists():
                cache_dir.mkdir(parents=True, exist_ok=True)

            print(f'saving to {cache_path}')
            with open(cache_path, "w") as f:
                f.write(data_str)

            if save_to_s3:
                s3 = boto3.resource('s3')
                s3_path = wait_times.get_s3_path(agency_id, d, stat_id,
                                                 start_time_str, end_time_str)
                s3_bucket = wait_times.get_s3_bucket()
                print(f'saving to s3://{s3_bucket}/{s3_path}')
                object = s3.Object(s3_bucket, s3_path)
                object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                           CacheControl='max-age=86400',
                           ContentType='application/json',
                           ContentEncoding='gzip',
                           ACL='public-read')
Ejemplo n.º 4
0
            hist = arrival_history.get_by_date(agency, route_id, d)
            arrivals = hist.get_data_frame(stop_id=stop, direction_id=stop_dir)

            start_time = util.get_timestamp_or_none(d, start_time_str, tz)
            end_time = util.get_timestamp_or_none(d, end_time_str, tz)

            departure_times = np.sort(arrivals['DEPARTURE_TIME'].values)
            if len(departure_times) == 0:
                continue

            first_bus_date_times.append(
                datetime.fromtimestamp(departure_times[0], tz))
            last_bus_date_times.append(
                datetime.fromtimestamp(departure_times[-1], tz))

            stats = wait_times.get_stats(departure_times, start_time, end_time)
            average = stats.get_average()
            if average is not None:
                averages.append(average)

            quantiles = stats.get_quantiles([0, 0.1, 0.5, 0.9, 1])
            if quantiles is not None:
                minimums.append(quantiles[0])
                p10s.append(quantiles[1])
                medians.append(quantiles[2])
                p90s.append(quantiles[3])
                maximums.append(quantiles[4])

        print(f"Date: {', '.join([str(date) for date in dates])}")
        print(f"Local Time Range: [{start_time_str}, {end_time_str}]")
        print(f"Route: {route_id} ({route_config.title})")