Beispiel #1
0
def reset_feed_dates(daterange):
    if not gtfs.has_table('feed_info'): return

    gtfs_daterange = GTFSDateRange(daterange['start'], daterange['end'])
    feed_info = gtfs.get_table('feed_info')

    feed_info['feed_start_date'] = gtfs_daterange.start.datestring()
    feed_info['feed_end_date'] = gtfs_daterange.end.datestring()

    gtfs.update_table('feed_info', feed_info)
Beispiel #2
0
def remove_trips_with_nonexistent_calendars():
    calendar = gtfs.get_table('calendar', index=False)

    trips = gtfs.get_table('trips')
    trips_filtered = trips[trips['service_id'].isin(calendar['service_id'])]

    if (gtfs.has_table('frequencies')):
        frequencies = gtfs.get_table('frequencies')
        frequencies_filtered = frequencies[frequencies['trip_id'].isin(
            trips_filtered.index.to_series())]
        gtfs.update_table('frequencies', frequencies_filtered)

    gtfs.update_table('trips', trips_filtered)
Beispiel #3
0
def filter_board_alight_by_daterange(daterange):
    if not gtfs.has_table('board_alight'): return

    board_alight = gtfs.get_table('board_alight', index=False)
    if 'service_date' not in board_alight.columns: return

    filter_daterange = GTFSDateRange(daterange['start'], daterange['end'])

    board_alight['_inrange'] = board_alight.apply(
        lambda row: filter_daterange.includes(row['service_date']), axis=1)
    board_alight_filtered = board_alight[board_alight['_inrange']]

    gtfs.update_table('board_alight', board_alight_filtered)
Beispiel #4
0
def filter_trips_by_date(date):
    # removes trips that do not occur on specified date
    # TODO consider replacing with filter_calendars_by_date, prune

    trips_extended = get_trips_extended()

    dow = GTFSDate(date).dow()

    date_in_range = (trips_extended['start_date'] <=
                     date) & (date <= trips_extended['end_date'])
    dow_in_service = trips_extended[dow] == GTFSBool.TRUE

    trips_filter = date_in_range & dow_in_service

    # filter calendar_dates for relevant calendar exceptions
    if gtfs.has_table('calendar_dates'):
        calendar_dates = gtfs.get_table('calendar_dates')
        added_on_date = (calendar_dates['date'] == date) & (
            calendar_dates['exception_type'] == GTFSExceptionType.ADDED)
        services_added_on_date = calendar_dates[added_on_date]['service_id']

        removed_on_date = (calendar_dates['date'] == date) & (
            calendar_dates['exception_type'] == GTFSExceptionType.REMOVED)
        services_removed_on_date = calendar_dates[removed_on_date][
            'service_id']
        service_added_on_date = trips_extended['service_id'].isin(
            services_added_on_date)
        service_removed_on_date = trips_extended['service_id'].isin(
            services_removed_on_date)

        if gtfs.has_table('calendar'):
            trips_filter = (date_in_range & dow_in_service
                            & ~service_removed_on_date) | service_added_on_date
        else:
            trips_filter = service_added_on_date

    trips_filtered_df = trips_extended[trips_filter]

    gtfs.update_table('trips', trips_filtered_df)
Beispiel #5
0
def filter_calendar_dates_by_daterange(daterange):
    if not gtfs.has_table('calendar_dates'): return

    calendar_dates = gtfs.get_table('calendar_dates')
    filter_daterange = GTFSDateRange(daterange['start'], daterange['end'])

    calendar_dates['_gtfs_date'] = calendar_dates.apply(
        lambda row: GTFSDate(row['date']), axis=1)
    calendar_dates['_inrange'] = calendar_dates.apply(
        lambda row: filter_daterange.includes(row['date']), axis=1)

    calendar_dates_filtered = calendar_dates[calendar_dates['_inrange']]

    gtfs.update_table('calendar_dates', calendar_dates_filtered)
Beispiel #6
0
def filter_calendars_by_daterange(daterange):

    calendar = gtfs.get_table('calendar')
    filter_daterange = GTFSDateRange(daterange['start'], daterange['end'])

    calendar['_gtfs_daterange'] = calendar.apply(
        lambda row: GTFSDateRange(row['start_date'], row['end_date']), axis=1)
    calendar['_overlap'] = calendar['_gtfs_daterange'].apply(lambda dr: \
        filter_daterange.get_overlap(dr) \
    )

    # we want to remove calendar entries that don't overlap DOWs
    calendar['_dows_overlap'] = calendar.apply(lambda row: \
        GTFSBool.TRUE in (row[dow] for dow in filter_daterange.days_of_week()),
        axis=1
    )

    # we want to keep calendar entries that are used in overlapping exceptions
    if gtfs.has_table('calendar_dates'):
        calendar_dates = gtfs.get_table('calendar_dates')
        calendar_dates['_date_overlap'] = calendar_dates.apply(
            lambda row: filter_daterange.includes(row['date']), axis=1)
        calendar_dates = calendar_dates[calendar_dates['_date_overlap']]
        calendar['_exception_overlap'] = calendar.index.to_series().isin(
            calendar_dates['service_id'])
    else:
        calendar['_exception_overlap'] = False

    calendar = calendar[(calendar['_overlap'].notnull()
                         & calendar['_dows_overlap'])
                        | calendar['_exception_overlap']]

    # trim bounds to fit within daterange
    calendar['start_date'] = calendar['_overlap'].apply(
        lambda dr: dr.start.datestring())
    calendar['end_date'] = calendar['_overlap'].apply(
        lambda dr: dr.end.datestring())

    gtfs.update_table('calendar', calendar)
Beispiel #7
0
def get_feed_start_end_daterange():
    if not gtfs.has_table('feed_info'): return None
    feed_info = gtfs.get_table('feed_info')
    return GTFSDateRange(feed_info.loc[0, 'feed_start_date'],
                         feed_info.loc[0, 'feed_end_date'])
Beispiel #8
0
def interpolate_stop_times():
    # returns false if interpolation not possible

    stop_times = gtfs.get_table('stop_times')
    shapes = gtfs.get_table('shapes')

    no_shape_dist_traveled = 'shape_dist_traveled' not in stop_times.columns \
        or stop_times['shape_dist_traveled'].isna().all()

    no_shapes_txt = not gtfs.has_table('shapes') or shapes.empty

    if (no_shape_dist_traveled or no_shapes_txt):
        return False

    # build table with chunk information

    df = stop_times.copy()
    df['has_arrival'] = df['arrival_time'].notna()
    df['has_departure'] = df['departure_time'].notna()

    df = df[df['has_arrival'] | df['has_departure']]
    timepoints_only = df[df['has_arrival'] | df['has_departure']]

    # https://stackoverflow.com/questions/50411098/how-to-do-forward-rolling-sum-in-pandas
    df['next_stop_sequence'] = timepoints_only.sort_values(by=['trip_id', 'stop_sequence']) \
        .iloc[::-1] \
        .groupby('trip_id')['stop_sequence'].transform(lambda x: x.rolling(2).max()) \
        .iloc[::-1] \

    # cleanup
    df['next_stop_sequence'] = df['next_stop_sequence'].fillna(df['stop_sequence']).astype('int64')

    df['stop_sequence_list'] = df.apply(lambda row: \
        list(range(row['stop_sequence'], row['next_stop_sequence']) \
        if row['stop_sequence'] != row['next_stop_sequence'] \
        else [row['stop_sequence']] \
    ), axis=1)

    df = df.explode('stop_sequence_list')
    df = df.rename(columns={'stop_sequence': 'start_seq', 'next_stop_sequence': 'end_seq', 'stop_sequence_list': 'stop_sequence'})

    chunks = df.set_index(['trip_id', 'stop_sequence']) \
        [['start_seq', 'end_seq']]


    stop_times = stop_times.set_index(['trip_id', 'stop_sequence'])
    stop_times = stop_times.merge(chunks, \
        how='left',
        right_index=True,
        left_index=True,
    )

    start_time = stop_times['departure_time'].rename('start_time')
    end_time = stop_times['arrival_time'].rename('end_time')
    start_sdt = stop_times['shape_dist_traveled'].rename('start_sdt')
    end_sdt = stop_times['shape_dist_traveled'].rename('end_sdt')

    stop_times = stop_times.merge(start_time, \
        left_on=['trip_id', 'start_seq'],
        right_index=True
    )
    
    stop_times = stop_times.merge(end_time, \
        left_on=['trip_id', 'end_seq'],
        right_index=True
    )
    
    stop_times = stop_times.merge(start_sdt, \
        left_on=['trip_id', 'start_seq'],
        right_index=True
    )
    
    stop_times = stop_times.merge(end_sdt, \
        left_on=['trip_id', 'end_seq'],
        right_index=True
    )

    def interpolate_row(row):
        # happens if last stop or on 1-stop chunks (consecutive timepoints)
        if (row['start_time'] == row['end_time']):
            return row['start_time']

        return seconds_to_military( \
            seconds_since_zero(row['start_time']) + \
                int(round( \
                    ( \
                        (row['shape_dist_traveled'] - row['start_sdt']) / (row['end_sdt'] - row['start_sdt']) \
                    ) * ( \
                        seconds_since_zero(row['end_time']) - seconds_since_zero(row['start_time']) \
                    ) \
                ))
            )

    stop_times['interp'] = stop_times.apply(lambda row: interpolate_row(row), axis=1)
    stop_times['arrival_time'] = stop_times['arrival_time'].fillna(stop_times['interp'])
    stop_times['departure_time'] = stop_times['departure_time'].fillna(stop_times['interp'])

    gtfs.update_table('stop_times', stop_times.reset_index(), cascade=False)

    return True