Ejemplo n.º 1
0
def route_config():
    route_id = request.args.get('route_id')
    route = nextbus.get_route_config('sf-muni', route_id)

    data = {
        'id':
        route_id,
        'title':
        route.title,
        'directions': [{
            'id': dir.id,
            'title': dir.title,
            'name': dir.name,
            'stops': dir.get_stop_ids()
        } for dir in route.get_direction_infos()],
        'stops': {
            stop.id: {
                'title': stop.title,
                'lat': stop.lat,
                'lon': stop.lon
            }
            for stop in route.get_stop_infos()
        }
    }
    return Response(json.dumps(data, indent=2), mimetype='application/json')
Ejemplo n.º 2
0
def get_arrival_history(as_of_date: datetime, time_zone: pytz.timezone,
                        increment: timedelta, agency: str, route_ids: list,
                        start_hour: int, continue_index):
    start_dt = time_zone.localize(
        datetime(as_of_date.year,
                 as_of_date.month,
                 as_of_date.day,
                 hour=start_hour))
    end_dt = start_dt + increment

    start_time = int(start_dt.timestamp())
    end_time = int(end_dt.timestamp())

    print(f"time = [{start_dt}, {end_dt})")

    t1 = time.time()

    state = trynapi.get_state(agency, as_of_date, start_time, end_time,
                              route_ids)

    print(f'retrieved state in {round(time.time()-t1,1)} sec')

    for i, route_id in enumerate(route_ids):
        if continue_index is not None and i < continue_index:
            continue

        route_state = state.get_for_route(route_id)

        if route_state is None:
            print(f'no state for route {route_id}')
            continue

        route_config = nextbus.get_route_config(agency, route_id)

        t1 = time.time()

        arrivals_df = eclipses.find_arrivals(route_state, route_config,
                                             as_of_date, time_zone)

        history = arrival_history.from_data_frame(agency, route_id,
                                                  arrivals_df, start_time,
                                                  end_time)

        print(f'{route_id}: {round(time.time()-t1,1)} saving arrival history')

        arrival_history.save_for_date(history, d, args.s3)

        print(f'{route_id}: {round(time.time()-t1,2)} done')
Ejemplo n.º 3
0
def route_config():
    route_id = request.args.get('route_id')
    params = {'route_id': route_id}

    if route_id is None:
        return make_error_response(params, "Missing route_id", 400)

    route = nextbus.get_route_config('sf-muni', route_id)

    if route is None:
        return make_error_response(params, f"Invalid route ID {route_id}", 404)

    data = {
        'id':
        route_id,
        'title':
        route.title,
        'directions': [{
            'id': dir.id,
            'title': dir.title,
            'name': dir.name,
            'stops': dir.get_stop_ids()
        } for dir in route.get_direction_infos()],
        'stops': {
            stop.id: {
                'title': stop.title,
                'lat': stop.lat,
                'lon': stop.lon
            }
            for stop in route.get_stop_infos()
        }
    }
    res = Response(
        json.dumps(data),
        mimetype='application/json')  # no prettyprinting to save bandwidth
    if not DEBUG:
        res.headers['Cache-Control'] = 'max-age=3600'
    return res
Ejemplo n.º 4
0
def compute_arrivals_for_date(d: date, start_hour: int, tz: pytz.timezone,
                agency: str, route_ids: list,
                s3=False):

    start_dt = tz.localize(datetime(d.year, d.month, d.day, hour=start_hour))
    end_dt = start_dt + timedelta(days=1)

    start_time = int(start_dt.timestamp())
    end_time = int(end_dt.timestamp())

    print(f"time = [{start_dt}, {end_dt})")

    t1 = time.time()

    state = trynapi.get_state(agency, d, start_time, end_time, route_ids)

    print(f'retrieved state in {round(time.time()-t1,1)} sec')

    for i, route_id in enumerate(route_ids):
        route_state = state.get_for_route(route_id)

        if route_state is None:
            print(f'no state for route {route_id}')
            continue

        route_config = nextbus.get_route_config(agency, route_id)

        t1 = time.time()

        arrivals_df = eclipses.find_arrivals(route_state, route_config, d, tz)

        history = arrival_history.from_data_frame(agency, route_id, arrivals_df, start_time, end_time)

        print(f'{route_id}: {round(time.time()-t1,1)} saving arrival history')

        arrival_history.save_for_date(history, d, s3)

        print(f'{route_id}: {round(time.time()-t1,2)} done')
Ejemplo n.º 5
0
def metrics_page():
    metrics_start = time.time()
    route_id = request.args.get('route_id')
    if route_id is None:
        route_id = '12'
    start_stop_id = request.args.get('start_stop_id')
    if start_stop_id is None:
        start_stop_id = '3476'
    end_stop_id = request.args.get('end_stop_id')

    direction_id = request.args.get('direction_id')

    start_date_str = request.args.get('start_date')
    end_date_str = request.args.get('end_date')
    date_str = request.args.get('date')

    if date_str is not None:
        start_date_str = end_date_str = date_str
    else:
        if start_date_str is None:
            start_date_str = '2019-04-08'
        if end_date_str is None:
            end_date_str = start_date_str

    start_time_str = request.args.get('start_time') # e.g. "14:00" (24h time of day)
    end_time_str = request.args.get('end_time') # e.g. "18:00" or "03:00+1" (24h time of day)

    params = {
        'start_stop_id': start_stop_id,
        'end_stop_id': end_stop_id,
        'route_id': route_id,
        'direction_id': direction_id,
        'start_date': start_date_str,
        'end_date': end_date_str,
        'start_time': start_time_str,
        'end_time': end_time_str,
    }

    data = {
        'params': params
    }

    try:
        route_config = nextbus.get_route_config('sf-muni', route_id)

        start_stop_info = route_config.get_stop_info(start_stop_id)
        if start_stop_info is None:
            raise errors.ValidationError(f"Stop {start_stop_id} is not on route {route_id}")

        data['start_stop_title'] = start_stop_info.title

        if end_stop_id:
            end_stop_info = route_config.get_stop_info(end_stop_id)
            if end_stop_info is None:
                raise errors.ValidationError(f"Stop {end_stop_id} is not on route {route_id}")
            data['end_stop_title'] = end_stop_info.title

        rng = metrics.Range(
            util.get_dates_in_range(start_date_str, end_date_str),
            start_time_str,
            end_time_str,
            pytz.timezone('US/Pacific')
        )

        route_metrics = metrics.RouteMetrics('sf-muni', route_id)

        keys = ['count','avg','min','median','max','percentiles','histogram']

        data['wait_times'] = route_metrics.get_wait_time_stats(
            direction_id, start_stop_id,
            rng, keys
        )

        data['trip_times'] = route_metrics.get_trip_time_stats(
            direction_id, start_stop_id, end_stop_id,
            rng, keys
        )

        data['headway_min'] = route_metrics.get_headway_min_stats(
            direction_id, start_stop_id,
            rng, keys
        )

    except errors.ArrivalHistoryNotFoundError as ex:
        return make_error_response(params, str(ex), 404)
    except errors.ValidationError as ex:
        return make_error_response(params, str(ex), 400)

    metrics_end = time.time()
    data['processing_time'] = (metrics_end - metrics_start)

    res = Response(json.dumps(data, indent=2), mimetype='application/json')
    if not DEBUG:
        res.headers['Cache-Control'] = 'max-age=60'
    return res
def compute_trip_times(d: date,
                       tz,
                       agency_id,
                       routes,
                       save_to_s3=True,
                       stat_ids=None):
    if stat_ids is None:
        stat_ids = stat_groups.keys()

    print(d)
    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00', '19:00'))

    timestamp_intervals = [
        (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
         int(util.get_localized_datetime(d, end_time_str, tz).timestamp()))
        for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    all_trip_time_stats = {}
    for interval_index, _ in enumerate(timestamp_intervals):
        all_trip_time_stats[interval_index] = {}
        for stat_id in stat_ids:
            all_trip_time_stats[interval_index][stat_id] = {}

    for route in routes:
        route_id = route.id
        print(route_id)
        t1 = time.time()

        route_config = nextbus.get_route_config(agency_id, route_id)

        try:
            history = arrival_history.get_by_date(agency_id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        route_df = history.get_data_frame().sort_values('TRIP', axis=0)

        add_trip_time_stats_for_route(all_trip_time_stats, timestamp_intervals,
                                      stat_ids, route_config, route_df)

        t2 = time.time()
        print(f' {round(t2-t1, 2)} sec')

    for interval_index, (start_time,
                         end_time) in enumerate(timestamp_intervals):
        start_time_str, end_time_str = time_str_intervals[interval_index]

        for stat_id in stat_ids:
            stat = stat_groups[stat_id]
            data_str = json.dumps(
                {
                    'version': trip_times.DefaultVersion,
                    'start_time': start_time,
                    'end_time': end_time,
                    'stat': stat,
                    'routes': all_trip_time_stats[interval_index][stat_id]
                },
                separators=(',', ':'))

            cache_path = trip_times.get_cache_path(agency_id, d, stat_id,
                                                   start_time_str,
                                                   end_time_str)

            print(cache_path)

            cache_dir = Path(cache_path).parent
            if not cache_dir.exists():
                cache_dir.mkdir(parents=True, exist_ok=True)

            print(f'saving to {cache_path}')
            with open(cache_path, "w") as f:
                f.write(data_str)

            if save_to_s3:
                s3 = boto3.resource('s3')
                s3_path = trip_times.get_s3_path(agency_id, d, stat_id,
                                                 start_time_str, end_time_str)
                s3_bucket = trip_times.get_s3_bucket()
                print(f'saving to s3://{s3_bucket}/{s3_path}')
                object = s3.Object(s3_bucket, s3_path)
                object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                           CacheControl='max-age=86400',
                           ContentType='application/json',
                           ContentEncoding='gzip',
                           ACL='public-read')
Ejemplo n.º 7
0
                        help='hh:mm of first local time to include each day')
    parser.add_argument('--end-time',
                        help='hh:mm of first local time to exclude each day')

    args = parser.parse_args()

    route_id = args.route
    date_str = args.date
    vid = args.vid

    agency = 'sf-muni'

    start_time_str = args.start_time
    end_time_str = args.end_time

    route_config = nextbus.get_route_config('sf-muni', route_id)

    tz = pytz.timezone('US/Pacific')

    dates = util.get_dates_in_range(args.date, args.date)

    print(f"Date: {', '.join([str(date) for date in dates])}")
    print(f"Time of Day: [{start_time_str}, {end_time_str})")
    print(f"Route: {route_id} ({route_config.title})")
    print(f"Vehicle: {vid}")

    for d in dates:
        history = arrival_history.get_by_date(agency, route_id, d)

        df = history.get_data_frame(vehicle_id=vid,
                                    tz=tz,
Ejemplo n.º 8
0
    version = args.version
    if version is None:
        version = arrival_history.DefaultVersion

    route_id = args.route
    date_str = args.date
    s1 = args.s1
    s2 = args.s2

    agency = 'sf-muni'

    start_time_str = args.start_time
    end_time_str = args.end_time

    route_config = nextbus.get_route_config(agency, route_id)

    s1_info = route_config.get_stop_info(s1)
    s1_dirs = route_config.get_directions_for_stop(s1)
    if len(s1_dirs) == 0 or s1_info is None:
        raise Exception(f"invalid stop id {s1}")

    s2_info = route_config.get_stop_info(s2)
    s2_dirs = route_config.get_directions_for_stop(s2)
    if len(s1_dirs) == 0 or s2_info is None:
        raise Exception(f"invalid stop id {s2}")

    if s1 == s2:
        raise Exception(f"stop {s1} and {s2} are the same")

    common_dirs = [dir for dir in s1_dirs if dir in s2_dirs]
Ejemplo n.º 9
0
def compute_wait_times(agency_id,
                       d: date,
                       routes,
                       tz,
                       stat_ids,
                       save_to_s3=False):
    print(d)
    all_wait_time_stats = {}

    time_str_intervals = constants.DEFAULT_TIME_STR_INTERVALS.copy()
    time_str_intervals.append(('07:00', '19:00'))

    timestamp_intervals = [
        (int(util.get_localized_datetime(d, start_time_str, tz).timestamp()),
         int(util.get_localized_datetime(d, end_time_str, tz).timestamp()))
        for start_time_str, end_time_str in time_str_intervals
    ]

    timestamp_intervals.append((None, None))
    time_str_intervals.append((None, None))

    for interval_index, _ in enumerate(timestamp_intervals):
        all_wait_time_stats[interval_index] = {}
        for stat_id in stat_ids:
            all_wait_time_stats[interval_index][stat_id] = {}

    for route in routes:
        route_id = route.id

        print(route_id)
        route_config = nextbus.get_route_config(agency_id, route_id)

        try:
            history = arrival_history.get_by_date(agency_id, route_id, d)
        except FileNotFoundError as ex:
            print(ex)
            continue

        for interval_index, _ in enumerate(timestamp_intervals):
            for stat_id in stat_ids:
                all_wait_time_stats[interval_index][stat_id][route_id] = {}

        df = history.get_data_frame()
        df = df.sort_values('TIME', axis=0)

        for dir_info in route_config.get_direction_infos():

            dir_id = dir_info.id

            for interval_index, _ in enumerate(timestamp_intervals):
                for stat_id in stat_ids:
                    all_wait_time_stats[interval_index][stat_id][route_id][
                        dir_id] = {}

            stop_ids = dir_info.get_stop_ids()
            sid_values = df['SID'].values

            for i, stop_id in enumerate(stop_ids):
                stop_df = df[sid_values == stop_id]

                all_time_values = stop_df['TIME'].values

                for interval_index, (
                        start_time,
                        end_time) in enumerate(timestamp_intervals):
                    wait_time_stats = wait_times.get_stats(
                        all_time_values, start_time, end_time)

                    add_wait_time_stats_for_stop(
                        all_wait_time_stats[interval_index], stat_ids,
                        route_id, dir_id, stop_id, wait_time_stats)

            for interval_index, _ in enumerate(timestamp_intervals):
                for stat_id in stat_ids:
                    add_median_wait_time_stats_for_direction(
                        all_wait_time_stats[interval_index][stat_id][route_id]
                        [dir_id], stat_id)

    for interval_index, (start_time,
                         end_time) in enumerate(timestamp_intervals):
        start_time_str, end_time_str = time_str_intervals[interval_index]

        for stat_id in stat_ids:
            stat = stat_groups[stat_id]

            data_str = json.dumps(
                {
                    'version': wait_times.DefaultVersion,
                    'start_time': start_time,
                    'end_time': end_time,
                    'stat': stat,
                    'routes': all_wait_time_stats[interval_index][stat_id]
                },
                separators=(',', ':'))

            cache_path = wait_times.get_cache_path(agency_id, d, stat_id,
                                                   start_time_str,
                                                   end_time_str)

            cache_dir = Path(cache_path).parent
            if not cache_dir.exists():
                cache_dir.mkdir(parents=True, exist_ok=True)

            print(f'saving to {cache_path}')
            with open(cache_path, "w") as f:
                f.write(data_str)

            if save_to_s3:
                s3 = boto3.resource('s3')
                s3_path = wait_times.get_s3_path(agency_id, d, stat_id,
                                                 start_time_str, end_time_str)
                s3_bucket = wait_times.get_s3_bucket()
                print(f'saving to s3://{s3_bucket}/{s3_path}')
                object = s3.Object(s3_bucket, s3_path)
                object.put(Body=gzip.compress(bytes(data_str, 'utf-8')),
                           CacheControl='max-age=86400',
                           ContentType='application/json',
                           ContentEncoding='gzip',
                           ACL='public-read')
Ejemplo n.º 10
0
def save_routes_for_agency(agency: config.Agency, save_to_s3=True):
    agency_id = agency.id
    gtfs_cache_dir = f'{util.get_data_dir()}/gtfs-{agency_id}'

    download_gtfs_data(agency, gtfs_cache_dir)

    feed = ptg.load_geo_feed(gtfs_cache_dir, {})

    print(f"Loading {agency_id} routes...")
    routes_df = feed.routes
    if agency.gtfs_agency_id is not None:
        routes_df = routes_df[routes_df.agency_id == agency.gtfs_agency_id]

    routes_data = []

    print(f"Loading {agency_id} trips...")
    trips_df = feed.trips
    trips_df['direction_id'] = trips_df['direction_id'].astype(str)

    print(f"Loading {agency_id} stop times...")
    stop_times_df = feed.stop_times
    print(f"Loading {agency_id} shapes...")
    shapes_df = feed.shapes

    print(f"Loading {agency_id} stops...")
    stops_df = feed.stops

    # gtfs_stop_ids_map allows looking up row from stops.txt via GTFS stop_id
    gtfs_stop_ids_map = {stop.stop_id: stop for stop in stops_df.itertuples()}

    stop_id_gtfs_field = agency.stop_id_gtfs_field

    # get OpenTransit stop ID for GTFS stop_id (may be the same)
    def normalize_gtfs_stop_id(gtfs_stop_id):
        if stop_id_gtfs_field != 'stop_id':
            return getattr(gtfs_stop_ids_map[gtfs_stop_id], stop_id_gtfs_field)
        else:
            return gtfs_stop_id

    # stops_map allows looking up row from stops.txt via OpenTransit stop ID
    if stop_id_gtfs_field != 'stop_id':
        stops_map = {getattr(stop, stop_id_gtfs_field): stop for stop in stops_df.itertuples()}
    else:
        stops_map = gtfs_stop_ids_map

    if agency.provider == 'nextbus':
        nextbus_route_order = [route.id for route in nextbus.get_route_list(agency.nextbus_id)]

    for route in routes_df.itertuples():

        gtfs_route_id = route.route_id

        short_name = route.route_short_name
        long_name = route.route_long_name

        if isinstance(short_name, str) and isinstance(long_name, str):
            title = f'{short_name} - {long_name}'
        elif isinstance(short_name, str):
            title = short_name
        else:
            title = long_name

        type = int(route.route_type) if hasattr(route, 'route_type') else None
        url = route.route_url if hasattr(route, 'route_url') and isinstance(route.route_url, str) else None
        #color = route.route_color
        #text_color = route.route_text_color

        route_id = getattr(route, agency.route_id_gtfs_field)

        if agency.provider == 'nextbus':
            route_id = route_id.replace('-', '_') # hack to handle muni route IDs where e.g. GTFS has "T-OWL" but nextbus has "T_OWL"
            try:
                nextbus_route_config = nextbus.get_route_config(agency.nextbus_id, route_id)
                title = nextbus_route_config.title
            except Exception as ex:
                print(ex)
                continue

            try:
                sort_order = nextbus_route_order.index(route_id)
            except ValueError as ex:
                print(ex)
                sort_order = None
        else:
            sort_order = int(route.route_sort_order) if hasattr(route, 'route_sort_order') else None

        print(f'route {route_id} {title}')

        route_data = {
            'id': route_id,
            'title': title,
            'url': url,
            'type': type,
            #'color': color,
            #'text_color': text_color,
            'gtfs_route_id': gtfs_route_id,
            'sort_order': sort_order,
            'stops': {},
            'directions': [],
        }

        directions = []

        route_directions_df = feed.get('route_directions.txt') # unofficial trimet gtfs extension
        if not route_directions_df.empty:
            route_directions_df = route_directions_df[route_directions_df['route_id'] == gtfs_route_id]
        else:
            route_directions_df = None

        routes_data.append(route_data)

        route_trips_df = trips_df[trips_df['route_id'] == gtfs_route_id]

        route_direction_id_values = route_trips_df['direction_id'].values

        def add_custom_direction(custom_direction_info):
            direction_id = custom_direction_info['id']
            print(f' custom direction = {direction_id}')

            gtfs_direction_id = custom_direction_info['gtfs_direction_id']

            direction_trips_df = route_trips_df[route_direction_id_values == gtfs_direction_id]

            included_stop_ids = custom_direction_info.get('included_stop_ids', [])
            excluded_stop_ids = custom_direction_info.get('excluded_stop_ids', [])

            shapes = get_unique_shapes(
                direction_trips_df=direction_trips_df,
                stop_times_df=stop_times_df,
                stops_map=stops_map,
                normalize_gtfs_stop_id=normalize_gtfs_stop_id
            )

            def contains_included_stops(shape_stop_ids):
                min_index = 0
                for stop_id in included_stop_ids:
                    try:
                        index = shape_stop_ids.index(stop_id, min_index)
                    except ValueError:
                        return False
                    min_index = index + 1 # stops must appear in same order as in included_stop_ids
                return True

            def contains_excluded_stop(shape_stop_ids):
                for stop_id in excluded_stop_ids:
                    try:
                        index = shape_stop_ids.index(stop_id)
                        return True
                    except ValueError:
                        pass
                return False

            matching_shapes = []
            for shape in shapes:
                shape_stop_ids = shape['stop_ids']
                if contains_included_stops(shape_stop_ids) and not contains_excluded_stop(shape_stop_ids):
                    matching_shapes.append(shape)

            if len(matching_shapes) != 1:
                matching_shape_ids = [shape['shape_id'] for shape in matching_shapes]
                error_message = f'{len(matching_shapes)} shapes found for route {route_id} with GTFS direction ID {gtfs_direction_id}'
                if len(included_stop_ids) > 0:
                    error_message += f" including {','.join(included_stop_ids)}"

                if len(excluded_stop_ids) > 0:
                    error_message += f" excluding {','.join(excluded_stop_ids)}"

                if len(matching_shape_ids) > 0:
                    error_message += f": {','.join(matching_shape_ids)}"

                raise Exception(error_message)

            matching_shape = matching_shapes[0]
            matching_shape_id = matching_shape['shape_id']
            matching_shape_count = matching_shape['count']

            print(f'  matching shape = {matching_shape_id} ({matching_shape_count} times)')

            add_direction(
                id=direction_id,
                gtfs_shape_id=matching_shape_id,
                gtfs_direction_id=gtfs_direction_id,
                stop_ids=matching_shape['stop_ids'],
                title=custom_direction_info.get('title', None)
            )

        def add_default_direction(direction_id):
            print(f' default direction = {direction_id}')

            direction_trips_df = route_trips_df[route_direction_id_values == direction_id]

            shapes = get_unique_shapes(
                direction_trips_df=direction_trips_df,
                stop_times_df=stop_times_df,
                stops_map=stops_map,
                normalize_gtfs_stop_id=normalize_gtfs_stop_id)

            best_shape = shapes[0]
            best_shape_id = best_shape['shape_id']
            best_shape_count = best_shape['count']

            print(f'  most common shape = {best_shape_id} ({best_shape_count} times)')

            add_direction(
                id=direction_id,
                gtfs_shape_id=best_shape_id,
                gtfs_direction_id=direction_id,
                stop_ids=best_shape['stop_ids']
            )

        def add_direction(id, gtfs_shape_id, gtfs_direction_id, stop_ids, title = None):

            if title is None:
                default_direction_info = agency.default_directions.get(gtfs_direction_id, {})
                title_prefix = default_direction_info.get('title_prefix', None)

                last_stop_id = stop_ids[-1]
                last_stop = stops_map[last_stop_id]

                if title_prefix is not None:
                    title = f"{title_prefix} to {last_stop.stop_name}"
                else:
                    title = f"To {last_stop.stop_name}"

            print(f'  title = {title}')

            dir_data = {
                'id': id,
                'title': title,
                'gtfs_shape_id': gtfs_shape_id,
                'gtfs_direction_id': gtfs_direction_id,
                'stops': stop_ids,
                'stop_geometry': {},
            }
            route_data['directions'].append(dir_data)

            for stop_id in stop_ids:
                stop = stops_map[stop_id]
                stop_data = {
                    'id': stop_id,
                    'lat': round(stop.geometry.y, 5), # stop_lat in gtfs
                    'lon': round(stop.geometry.x, 5), # stop_lon in gtfs
                    'title': stop.stop_name,
                    'url': stop.stop_url if hasattr(stop, 'stop_url') and isinstance(stop.stop_url, str) else None,
                }
                route_data['stops'][stop_id] = stop_data

            geometry = shapes_df[shapes_df['shape_id'] == gtfs_shape_id]['geometry'].values[0]

            # partridge returns GTFS geometries for each shape_id as a shapely LineString
            # (https://shapely.readthedocs.io/en/stable/manual.html#linestrings).
            # Each coordinate is an array in [lon,lat] format (note: longitude first, latitude second)
            dir_data['coords'] = [
                {
                    'lat': round(coord[1], 5),
                    'lon': round(coord[0], 5)
                } for coord in geometry.coords
            ]

            if agency.provider == 'nextbus':
                # match nextbus direction IDs with GTFS direction IDs
                best_nextbus_dir_info, best_terminal_dist = match_nextbus_direction(nextbus_route_config, geometry)
                print(f'  {direction_id} = {best_nextbus_dir_info.id} (terminal_dist={int(best_terminal_dist)}) {" (questionable match)" if best_terminal_dist > 300 else ""}')
                # dir_data['title'] = best_nextbus_dir_info.title
                dir_data['nextbus_direction_id'] = best_nextbus_dir_info.id

            start_lat = geometry.coords[0][1]
            start_lon = geometry.coords[0][0]

            #print(f"  start_lat = {start_lat} start_lon = {start_lon}")

            deg_lat_dist = util.haver_distance(start_lat, start_lon, start_lat-0.1, start_lon)*10
            deg_lon_dist = util.haver_distance(start_lat, start_lon, start_lat, start_lon-0.1)*10

            # projection function from lon/lat coordinates in degrees (z ignored) to x/y coordinates in meters.
            # satisfying the interface of shapely.ops.transform (https://shapely.readthedocs.io/en/stable/manual.html#shapely.ops.transform).
            # This makes it possible to use shapely methods to calculate the distance in meters between geometries
            def project_xy(lon, lat, z=None):
                return (round((lon - start_lon) * deg_lon_dist, 1), round((lat - start_lat) * deg_lat_dist, 1))

            xy_geometry = shapely.ops.transform(project_xy, geometry)

            shape_lon_lat = np.array(geometry).T
            shape_lon = shape_lon_lat[0]
            shape_lat = shape_lon_lat[1]

            shape_prev_lon = np.r_[shape_lon[0], shape_lon[:-1]]
            shape_prev_lat = np.r_[shape_lat[0], shape_lat[:-1]]

            # shape_cumulative_dist[i] is the cumulative distance in meters along the shape geometry from 0th to ith coordinate
            shape_cumulative_dist = np.cumsum(util.haver_distance(shape_lon, shape_lat, shape_prev_lon, shape_prev_lat))

            shape_lines_xy = [shapely.geometry.LineString(xy_geometry.coords[i:i+2]) for i in range(0, len(xy_geometry.coords) - 1)]

            # this is the total distance of the GTFS shape, which may not be exactly the same as the
            # distance along the route between the first and last Nextbus stop
            dir_data['distance'] = int(shape_cumulative_dist[-1])

            print(f"  distance = {dir_data['distance']}")

            # Find each stop along the route shape, so that the frontend can draw line segments between stops along the shape
            start_index = 0

            for stop_id in stop_ids:
                stop_info = route_data['stops'][stop_id]

                # Need to project lon/lat coords to x/y in order for shapely to determine the distance between
                # a point and a line (shapely doesn't support distance for lon/lat coords)

                stop_xy = shapely.geometry.Point(project_xy(stop_info['lon'], stop_info['lat']))

                stop_geometry = get_stop_geometry(stop_xy, shape_lines_xy, shape_cumulative_dist, start_index)

                if stop_geometry['offset'] > 100:
                    print(f"    !! bad geometry for stop {stop_id}: {stop_geometry['offset']} m from route line segment")
                    continue

                dir_data['stop_geometry'][stop_id] = stop_geometry

                start_index = stop_geometry['after_index']

        if route_id in agency.custom_directions:
            for custom_direction_info in agency.custom_directions[route_id]:
                add_custom_direction(custom_direction_info)
        else:
            for direction_id in np.unique(route_direction_id_values):
                add_default_direction(direction_id)

    if routes_data[0]['sort_order'] is not None:
        sort_key = lambda route_data: route_data['sort_order']
    else:
        sort_key = lambda route_data: route_data['id']

    routes_data = sorted(routes_data, key=sort_key)

    data_str = json.dumps({
        'version': routeconfig.DefaultVersion,
        'routes': routes_data
    }, separators=(',', ':'))

    cache_path = routeconfig.get_cache_path(agency_id)

    with open(cache_path, "w") as f:
        f.write(data_str)

    if save_to_s3:
        s3 = boto3.resource('s3')
        s3_path = routeconfig.get_s3_path(agency_id)
        s3_bucket = config.s3_bucket
        print(f'saving to s3://{s3_bucket}/{s3_path}')
        object = s3.Object(s3_bucket, s3_path)
        object.put(
            Body=gzip.compress(bytes(data_str, 'utf-8')),
            CacheControl='max-age=86400',
            ContentType='application/json',
            ContentEncoding='gzip',
            ACL='public-read'
        )
Ejemplo n.º 11
0
def metrics_page():
    metrics_start = time.time()

    route_id = request.args.get('route_id')
    if route_id is None:
        route_id = '12'
    start_stop_id = request.args.get('start_stop_id')
    if start_stop_id is None:
        start_stop_id = '3476'
    end_stop_id = request.args.get('end_stop_id')

    direction_id = request.args.get('direction_id')

    start_date_str = request.args.get('start_date')
    end_date_str = request.args.get('end_date')
    date_str = request.args.get('date')
    if date_str is not None:
        start_date_str = end_date_str = date_str
    else:
        if start_date_str is None:
            start_date_str = '2019-02-01'
        if end_date_str is None:
            end_date_str = start_date_str

    start_time_str = request.args.get(
        'start_time')  # e.g. "14:00" (24h time of day)
    end_time_str = request.args.get(
        'end_time')  # e.g. "18:00" (24h time of day)

    params = {
        'start_stop_id': start_stop_id,
        'end_stop_id': end_stop_id,
        'route_id': route_id,
        'direction_id': direction_id,
        'start_date': start_date_str,
        'end_date': end_date_str,
        'start_time': start_time_str,
        'end_time': end_time_str,
    }

    try:
        dates = util.get_dates_in_range(start_date_str, end_date_str)
    except Exception as ex:
        return Response(json.dumps({
            'params': params,
            'error': str(ex),
        },
                                   indent=2),
                        status=400,
                        mimetype='application/json')

    tz = pytz.timezone('US/Pacific')

    route_config = nextbus.get_route_config('sf-muni', route_id)
    start_stop_info = route_config.get_stop_info(start_stop_id)
    end_stop_info = route_config.get_stop_info(
        end_stop_id) if end_stop_id else None

    # 404 if the given stop isn't on the route
    # TODO: what should be done for the case where the start stop id is valid but the end stop id isn't?
    if start_stop_info is None:
        return Response(json.dumps(
            {
                'params': params,
                'error': f"Stop {start_stop_id} is not on route {route_id}",
            },
            indent=2),
                        status=404,
                        mimetype='application/json')

    if direction_id is not None:
        dir_info = route_config.get_direction_info(direction_id)
        if dir_info is not None:
            dir_infos = [dir_info]
        else:
            dir_infos = []
    else:
        # TODO: validation for end_stop_id directions if given (see trips.py)
        dirs = route_config.get_directions_for_stop(start_stop_id)
        dir_infos = [
            route_config.get_direction_info(direction) for direction in dirs
        ]

    if end_stop_id:
        end_stop_dirs = route_config.get_directions_for_stop(end_stop_id)
        both_stops_same_dir = direction_id in end_stop_dirs

    directions = [{
        'id': dir_info.id,
        'title': dir_info.title
    } for dir_info in dir_infos]

    headway_min_arr = []
    waits = []
    if end_stop_id:
        completed_trips = []

    for d in dates:
        try:
            history = arrival_history.get_by_date('sf-muni', route_id, d)

            df = history.get_data_frame(start_stop_id,
                                        tz=tz,
                                        direction_id=direction_id,
                                        start_time_str=start_time_str,
                                        end_time_str=end_time_str)

            # get all headways for the selected stop (arrival time minus previous arrival time), computed separately for each day
            df['headway_min'] = metrics.compute_headway_minutes(df)

            # temporarily skip calculation of wait times until data is shown in front end
            waits.append(
                wait_times.get_waits(df, start_stop_info, d, tz, route_id,
                                     start_time_str, end_time_str))

            if end_stop_id and both_stops_same_dir:
                trips = trip_times.get_trip_times(df, history, tz,
                                                  start_stop_id, end_stop_id)
                completed_trips.append(
                    trips.trip_min[trips.trip_min.notnull()])

            headway_min = df.headway_min[df.headway_min.notnull(
            )]  # remove NaN row (first bus of the day)
            headway_min_arr.append(df.headway_min)
        except FileNotFoundError as ex:
            return Response(json.dumps(
                {
                    'params':
                    params,
                    'error':
                    f"Arrival history not found for route {route_id} on {d.isoformat()}",
                },
                indent=2),
                            status=404,
                            mimetype='application/json')
        except IndexError as ex:
            return Response(json.dumps(
                {
                    'params':
                    params,
                    'error':
                    f"No arrivals found for stop {start_stop_id} on route {route_id} in direction {direction_id} on {d.isoformat()}",
                },
                indent=2),
                            status=404,
                            mimetype='application/json')

    headway_min = pd.concat(headway_min_arr)
    waits = pd.concat(waits)
    if end_stop_id and both_stops_same_dir:
        completed_trips = pd.concat(completed_trips)

    if headway_min.empty:
        return Response(json.dumps(
            {
                'params':
                params,
                'error':
                f"No arrivals for stop {start_stop_id} on route {route_id}",
            },
            indent=2),
                        status=404,
                        mimetype='application/json')

    data = {
        'params':
        params,
        'route_title':
        route_config.title,
        'start_stop_title':
        start_stop_info.title if start_stop_info else None,
        'end_stop_title':
        end_stop_info.title if end_stop_info else None,
        'directions':
        directions,
        'headway_min':
        metrics.get_headways_stats(headway_min),
        'wait_times':
        metrics.get_wait_times_stats(waits, tz),
        'trip_times':
        metrics.get_trip_times_stats(completed_trips, start_stop_id,
                                     end_stop_id)
        if end_stop_id and both_stops_same_dir else None,
    }

    metrics_end = time.time()
    data['processing_time'] = (metrics_end - metrics_start)

    return Response(json.dumps(data, indent=2), mimetype='application/json')