예제 #1
0
    start_time_str = args.start_time
    end_time_str = args.end_time

    route_config = nextbus.get_route_config('sf-muni', route_id)

    tz = pytz.timezone('US/Pacific')

    dates = util.get_dates_in_range(args.date, args.date)

    print(f"Date: {', '.join([str(date) for date in dates])}")
    print(f"Time of Day: [{start_time_str}, {end_time_str})")
    print(f"Route: {route_id} ({route_config.title})")
    print(f"Vehicle: {vid}")

    for d in dates:
        history = arrival_history.get_by_date(agency, route_id, d)

        df = history.get_data_frame(vehicle_id=vid,
                                    tz=tz,
                                    start_time_str=start_time_str,
                                    end_time_str=end_time_str)

        if df.empty:
            print(f"no arrival times found for vehicle {vid} on {date_str}")
            continue

        df = df.sort_values('TIME', axis=0)

        for index, row in df.iterrows():
            stop_id = row.SID
            stop_info = route_config.get_stop_info(stop_id)
def compute_trip_times(d: date,
                       tz,
                       agency_id,
                       routes,
                       save_to_s3=True,
                       stat_ids=None):
    if stat_ids is None:
        stat_ids = stat_groups.keys()

    print(d)
    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00', '19:00'))

    timestamp_intervals = [
        (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
         int(util.get_localized_datetime(d, end_time_str, tz).timestamp()))
        for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    all_trip_time_stats = {}
    for interval_index, _ in enumerate(timestamp_intervals):
        all_trip_time_stats[interval_index] = {}
        for stat_id in stat_ids:
            all_trip_time_stats[interval_index][stat_id] = {}

    for route in routes:
        route_id = route.id
        print(route_id)
        t1 = time.time()

        route_config = nextbus.get_route_config(agency_id, route_id)

        try:
            history = arrival_history.get_by_date(agency_id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        route_df = history.get_data_frame().sort_values('TRIP', axis=0)

        add_trip_time_stats_for_route(all_trip_time_stats, timestamp_intervals,
                                      stat_ids, route_config, route_df)

        t2 = time.time()
        print(f' {round(t2-t1, 2)} sec')

    for interval_index, (start_time,
                         end_time) in enumerate(timestamp_intervals):
        start_time_str, end_time_str = time_str_intervals[interval_index]

        for stat_id in stat_ids:
            stat = stat_groups[stat_id]
            data_str = json.dumps(
                {
                    'version': trip_times.DefaultVersion,
                    'start_time': start_time,
                    'end_time': end_time,
                    'stat': stat,
                    'routes': all_trip_time_stats[interval_index][stat_id]
                },
                separators=(',', ':'))

            cache_path = trip_times.get_cache_path(agency_id, d, stat_id,
                                                   start_time_str,
                                                   end_time_str)

            print(cache_path)

            cache_dir = Path(cache_path).parent
            if not cache_dir.exists():
                cache_dir.mkdir(parents=True, exist_ok=True)

            print(f'saving to {cache_path}')
            with open(cache_path, "w") as f:
                f.write(data_str)

            if save_to_s3:
                s3 = boto3.resource('s3')
                s3_path = trip_times.get_s3_path(agency_id, d, stat_id,
                                                 start_time_str, end_time_str)
                s3_bucket = trip_times.get_s3_bucket()
                print(f'saving to s3://{s3_bucket}/{s3_path}')
                object = s3.Object(s3_bucket, s3_path)
                object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                           CacheControl='max-age=86400',
                           ContentType='application/json',
                           ContentEncoding='gzip',
                           ACL='public-read')
예제 #3
0
    route_config = agency.get_route_config(route_id)

    tz = agency.tz

    dates = util.get_dates_in_range(args.date, args.date)

    print(f"Date: {', '.join([str(date) for date in dates])}")
    print(f"Time of Day: [{start_time_str}, {end_time_str})")
    print(f"Route: {route_id} ({route_config.title})")
    print(f"Vehicle: {vid}")

    num_stops = 0

    for d in dates:
        history = arrival_history.get_by_date(agency.id, route_id, d, version)

        start_time = util.get_timestamp_or_none(d, start_time_str, tz)
        end_time = util.get_timestamp_or_none(d, end_time_str, tz)

        df = history.get_data_frame(vehicle_id=vid,
                                    direction_id=direction_id,
                                    start_time=start_time,
                                    end_time=end_time)

        if df.empty:
            print(f"no arrival times found for vehicle {vid} on {date_str}")
            continue

        df = df.sort_values(['TIME', 'TRIP'], axis=0)
        df['DATE_TIME'] = df['TIME'].apply(
예제 #4
0
    dates = util.get_dates_in_range(args.date, args.date)

    print(f"Date: {', '.join([str(date) for date in dates])}")
    print(f"Time of Day: [{start_time_str}, {end_time_str})")

    def render_distance(dist):
        return '----' if np.isnan(dist) else ('%3dm' % dist)

    for route_id in route_ids:
        route_config = agency.get_route_config(route_id)

        df = pd.concat([
            arrival_history.get_by_date(agency.id, route_id, d, version) \
                .get_data_frame(
                    start_time = util.get_timestamp_or_none(d, start_time_str, tz),
                    end_time = util.get_timestamp_or_none(d, end_time_str, tz)
                )
                for d in dates
        ])

        print(f"Route: {route_id} ({route_config.title})")

        dir_infos = route_config.get_direction_infos()

        for dir_info in dir_infos:
            print(f"Direction: {dir_info.title} ({dir_info.id})")

            prev_stop_info = None

            for dir_index, stop_id in enumerate(dir_info.get_stop_ids()):
예제 #5
0
def compute_wait_times(agency_id,
                       d: date,
                       routes,
                       tz,
                       stat_ids,
                       save_to_s3=False):
    print(d)
    all_wait_time_stats = {}

    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00', '19:00'))

    timestamp_intervals = [
        (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
         int(util.get_localized_datetime(d, end_time_str, tz).timestamp()))
        for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    for interval_index, _ in enumerate(timestamp_intervals):
        all_wait_time_stats[interval_index] = {}
        for stat_id in stat_ids:
            all_wait_time_stats[interval_index][stat_id] = {}

    for route in routes:
        route_id = route.id

        print(route_id)
        route_config = nextbus.get_route_config(agency_id, route_id)

        try:
            history = arrival_history.get_by_date(agency_id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        for interval_index, _ in enumerate(timestamp_intervals):
            for stat_id in stat_ids:
                all_wait_time_stats[interval_index][stat_id][route_id] = {}

        df = history.get_data_frame()
        df = df.sort_values('TIME', axis=0)

        for dir_info in route_config.get_direction_infos():

            dir_id = dir_info.id

            for interval_index, _ in enumerate(timestamp_intervals):
                for stat_id in stat_ids:
                    all_wait_time_stats[interval_index][stat_id][route_id][
                        dir_id] = {}

            stop_ids = dir_info.get_stop_ids()
            sid_values = df['SID'].values

            for i, stop_id in enumerate(stop_ids):
                stop_df = df[sid_values == stop_id]

                all_time_values = stop_df['TIME'].values

                for interval_index, (
                        start_time,
                        end_time) in enumerate(timestamp_intervals):
                    wait_time_stats = wait_times.get_stats(
                        all_time_values, start_time, end_time)

                    add_wait_time_stats_for_stop(
                        all_wait_time_stats[interval_index], stat_ids,
                        route_id, dir_id, stop_id, wait_time_stats)

            for interval_index, _ in enumerate(timestamp_intervals):
                for stat_id in stat_ids:
                    add_median_wait_time_stats_for_direction(
                        all_wait_time_stats[interval_index][stat_id][route_id]
                        [dir_id], stat_id)

    for interval_index, (start_time,
                         end_time) in enumerate(timestamp_intervals):
        start_time_str, end_time_str = time_str_intervals[interval_index]

        for stat_id in stat_ids:
            stat = stat_groups[stat_id]

            data_str = json.dumps(
                {
                    'version': wait_times.DefaultVersion,
                    'start_time': start_time,
                    'end_time': end_time,
                    'stat': stat,
                    'routes': all_wait_time_stats[interval_index][stat_id]
                },
                separators=(',', ':'))

            cache_path = wait_times.get_cache_path(agency_id, d, stat_id,
                                                   start_time_str,
                                                   end_time_str)

            cache_dir = Path(cache_path).parent
            if not cache_dir.exists():
                cache_dir.mkdir(parents=True, exist_ok=True)

            print(f'saving to {cache_path}')
            with open(cache_path, "w") as f:
                f.write(data_str)

            if save_to_s3:
                s3 = boto3.resource('s3')
                s3_path = wait_times.get_s3_path(agency_id, d, stat_id,
                                                 start_time_str, end_time_str)
                s3_bucket = wait_times.get_s3_bucket()
                print(f'saving to s3://{s3_bucket}/{s3_path}')
                object = s3.Object(s3_bucket, s3_path)
                object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                           CacheControl='max-age=86400',
                           ContentType='application/json',
                           ContentEncoding='gzip',
                           ACL='public-read')
예제 #6
0
def compute_stats(d: date, agency: config.Agency, routes, save_to_s3=True):

    tz = agency.tz
    stat_ids = all_stat_ids

    print(d)

    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00','19:00'))

    timestamp_intervals = [(
            int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
            int(util.get_localized_datetime(d, end_time_str, tz).timestamp())
        ) for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    all_stats = {}

    for stat_id in stat_ids:
        all_stats[stat_id] = {}

        for interval_index, _ in enumerate(timestamp_intervals):
            all_stats[stat_id][interval_index] = {}

    for route in routes:
        route_id = route.id
        print(route_id)

        t1 = time.time()

        route_config = agency.get_route_config(route_id)

        try:
            history = arrival_history.get_by_date(agency.id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        try:
            timetable = timetables.get_by_date(agency.id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        timetable_df = timetable.get_data_frame()

        history_df = history.get_data_frame()

        for stat_id in stat_ids:
            for interval_index, _ in enumerate(timestamp_intervals):
                all_stats[stat_id][interval_index][route_id] = {'directions':{}}

                for dir_info in route_config.get_direction_infos():
                    dir_id = dir_info.id

                    all_stats[stat_id][interval_index][route_id]['directions'][dir_id] = collections.defaultdict(dict)

        add_trip_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df)
        add_wait_time_stats_for_route(all_stats, timestamp_intervals, route_config, history_df)
        add_schedule_adherence_stats_for_route(all_stats, timestamp_intervals, route_config, history_df, timetable_df)

        t2 = time.time()
        print(f' {round(t2-t1, 2)} sec')

    for stat_id in stat_ids:
        for interval_index, (start_time, end_time) in enumerate(timestamp_intervals):
            start_time_str, end_time_str = time_str_intervals[interval_index]

            data = {
                'routes': all_stats[stat_id][interval_index],
            }
            precomputed_stats.save_stats(agency.id, stat_id, d, start_time_str, end_time_str, data, save_to_s3)
예제 #7
0
    stop_rows = []

    dates = util.get_dates_in_range(args.date, args.date)

    print(f"Date: {', '.join([str(date) for date in dates])}")
    print(f"Time of Day: [{start_time_str}, {end_time_str})")

    def render_distance(dist):
        return '----' if np.isnan(dist) else ('%3dm' % dist)

    for route_id in route_ids:
        route_config = nextbus.get_route_config(agency_id, route_id)

        df = pd.concat([
            arrival_history.get_by_date(agency_id, route_id, d, version) \
                .get_data_frame(start_time_str = start_time_str, end_time_str = end_time_str, tz = tz)
                for d in dates
        ])

        print(f"Route: {route_id} ({route_config.title})")

        dir_infos = route_config.get_direction_infos()

        for dir_info in dir_infos:
            print(f"Direction: {dir_info.title} ({dir_info.id})")

            prev_stop_info = None

            for dir_index, stop_id in enumerate(dir_info.get_stop_ids()):
                stop_info = route_config.get_stop_info(stop_id)
    if direction_id:
        dir_info = route_configs[route_id].get_direction_info(
            direction_id) if direction_id else None
        print(
            f"Direction: {dir_info.title if dir_info else '?'} ({direction_id})"
        )

    if stop_id:
        stop_info = route_configs[route_id].get_stop_info(
            stop_id) if route_id else None
        print(f"Stop: {stop_info.title if stop_info else '?'} ({stop_id})")

    for d in dates:
        for route_id in route_ids:
            base_history = arrival_history.get_by_date(agency_id, route_id, d,
                                                       base_version)
            other_history = arrival_history.get_by_date(
                agency_id, route_id, d, other_version)

            base_df = base_history.get_data_frame(
                stop_id=stop_id, direction_id=direction_id).sort_values('TIME',
                                                                        axis=0)
            other_df = other_history.get_data_frame(
                stop_id=stop_id, direction_id=direction_id).sort_values('TIME',
                                                                        axis=0)

            base_trips += len(np.unique(base_df['TRIP']))
            other_trips += len(np.unique(other_df['TRIP']))

            def find_other_arrival_time(row):
                other_time = other_history.find_closest_arrival_time(
예제 #9
0
def metrics_page():
    metrics_start = time.time()

    route_id = request.args.get('route_id')
    if route_id is None:
        route_id = '12'
    start_stop_id = request.args.get('start_stop_id')
    if start_stop_id is None:
        start_stop_id = '3476'
    end_stop_id = request.args.get('end_stop_id')

    direction_id = request.args.get('direction_id')

    start_date_str = request.args.get('start_date')
    end_date_str = request.args.get('end_date')
    date_str = request.args.get('date')
    if date_str is not None:
        start_date_str = end_date_str = date_str
    else:
        if start_date_str is None:
            start_date_str = '2019-02-01'
        if end_date_str is None:
            end_date_str = start_date_str

    start_time_str = request.args.get(
        'start_time')  # e.g. "14:00" (24h time of day)
    end_time_str = request.args.get(
        'end_time')  # e.g. "18:00" (24h time of day)

    params = {
        'start_stop_id': start_stop_id,
        'end_stop_id': end_stop_id,
        'route_id': route_id,
        'direction_id': direction_id,
        'start_date': start_date_str,
        'end_date': end_date_str,
        'start_time': start_time_str,
        'end_time': end_time_str,
    }

    try:
        dates = util.get_dates_in_range(start_date_str, end_date_str)
    except Exception as ex:
        return Response(json.dumps({
            'params': params,
            'error': str(ex),
        },
                                   indent=2),
                        status=400,
                        mimetype='application/json')

    tz = pytz.timezone('US/Pacific')

    route_config = nextbus.get_route_config('sf-muni', route_id)
    start_stop_info = route_config.get_stop_info(start_stop_id)
    end_stop_info = route_config.get_stop_info(
        end_stop_id) if end_stop_id else None

    # 404 if the given stop isn't on the route
    # TODO: what should be done for the case where the start stop id is valid but the end stop id isn't?
    if start_stop_info is None:
        return Response(json.dumps(
            {
                'params': params,
                'error': f"Stop {start_stop_id} is not on route {route_id}",
            },
            indent=2),
                        status=404,
                        mimetype='application/json')

    if direction_id is not None:
        dir_info = route_config.get_direction_info(direction_id)
        if dir_info is not None:
            dir_infos = [dir_info]
        else:
            dir_infos = []
    else:
        # TODO: validation for end_stop_id directions if given (see trips.py)
        dirs = route_config.get_directions_for_stop(start_stop_id)
        dir_infos = [
            route_config.get_direction_info(direction) for direction in dirs
        ]

    if end_stop_id:
        end_stop_dirs = route_config.get_directions_for_stop(end_stop_id)
        both_stops_same_dir = direction_id in end_stop_dirs

    directions = [{
        'id': dir_info.id,
        'title': dir_info.title
    } for dir_info in dir_infos]

    headway_min_arr = []
    waits = []
    if end_stop_id:
        completed_trips = []

    for d in dates:
        try:
            history = arrival_history.get_by_date('sf-muni', route_id, d)

            df = history.get_data_frame(start_stop_id,
                                        tz=tz,
                                        direction_id=direction_id,
                                        start_time_str=start_time_str,
                                        end_time_str=end_time_str)

            # get all headways for the selected stop (arrival time minus previous arrival time), computed separately for each day
            df['headway_min'] = metrics.compute_headway_minutes(df)

            # temporarily skip calculation of wait times until data is shown in front end
            waits.append(
                wait_times.get_waits(df, start_stop_info, d, tz, route_id,
                                     start_time_str, end_time_str))

            if end_stop_id and both_stops_same_dir:
                trips = trip_times.get_trip_times(df, history, tz,
                                                  start_stop_id, end_stop_id)
                completed_trips.append(
                    trips.trip_min[trips.trip_min.notnull()])

            headway_min = df.headway_min[df.headway_min.notnull(
            )]  # remove NaN row (first bus of the day)
            headway_min_arr.append(df.headway_min)
        except FileNotFoundError as ex:
            return Response(json.dumps(
                {
                    'params':
                    params,
                    'error':
                    f"Arrival history not found for route {route_id} on {d.isoformat()}",
                },
                indent=2),
                            status=404,
                            mimetype='application/json')
        except IndexError as ex:
            return Response(json.dumps(
                {
                    'params':
                    params,
                    'error':
                    f"No arrivals found for stop {start_stop_id} on route {route_id} in direction {direction_id} on {d.isoformat()}",
                },
                indent=2),
                            status=404,
                            mimetype='application/json')

    headway_min = pd.concat(headway_min_arr)
    waits = pd.concat(waits)
    if end_stop_id and both_stops_same_dir:
        completed_trips = pd.concat(completed_trips)

    if headway_min.empty:
        return Response(json.dumps(
            {
                'params':
                params,
                'error':
                f"No arrivals for stop {start_stop_id} on route {route_id}",
            },
            indent=2),
                        status=404,
                        mimetype='application/json')

    data = {
        'params':
        params,
        'route_title':
        route_config.title,
        'start_stop_title':
        start_stop_info.title if start_stop_info else None,
        'end_stop_title':
        end_stop_info.title if end_stop_info else None,
        'directions':
        directions,
        'headway_min':
        metrics.get_headways_stats(headway_min),
        'wait_times':
        metrics.get_wait_times_stats(waits, tz),
        'trip_times':
        metrics.get_trip_times_stats(completed_trips, start_stop_id,
                                     end_stop_id)
        if end_stop_id and both_stops_same_dir else None,
    }

    metrics_end = time.time()
    data['processing_time'] = (metrics_end - metrics_start)

    return Response(json.dumps(data, indent=2), mimetype='application/json')