コード例 #1
0
def load_range_schedule(start_date, end_date,
                        stop_filter, route_filter,
                        data_dir,
                        train_line = 'nqrw'):

    loader = gtfs_loader.GTFSLoader(data_dir=data_dir,
                                    train_line='nqrw')
    data = loader.load_range(start_date, end_date, stop_filter=stop_filter, route_filter=route_filter, verbose=True, schedule=True)

    schedule_dfs = []
    for stop, train in zip(loader.stop_dict.items(), loader.train_dict.items()):
        schedule_dfs.append(pd.DataFrame(stop[1], index=train[1], columns=[stop[0]]))

    # Change to numerical index for stops so plotting is a little easier
    line = SubwayLine(N_STOP_LIST)
    schedule_df = pd.concat(schedule_dfs)
    schedule_df.sort_index(inplace=True)
    schedule_df = schedule_df[start_date:end_date]
    schedule_df = schedule_df.applymap(lambda x: line.stop_idx(x) if not isinstance(x, float) else x)
    schedule_df.reset_index(inplace=True)
    schedule_df.reset_index(inplace=True)

    data['schedule_df'] = schedule_df
    data['loader'] = loader
    return data
コード例 #2
0
def load_range_schedule(loader):
    """
    Load in data from a GTFSLoader that can be used to generate a "schedule" of every train's path
    NOTE: This should only be used on daily grouped data , otherwise there will be
    train naming conflicts

    The resulting dataframe is [time x train_id] containing the current stop_id of the train

    :param loader: A loaded loader object
    :return: a data frame that is dimensioned [time x train_id]
    """

    if len(loader.stop_dict) == 0:
        raise ValueError("loader has no data, loaded.  Call GTFSLoader.load_range(...) first")

    schedule_dfs = []
    for stop, train in zip(loader.stop_dict.items(), loader.time_dict.items()):
        schedule_dfs.append(pd.DataFrame(stop[1], index=train[1], columns=[stop[0]]))

    # Change to numerical index for stops so plotting is a little easier
    line = SubwayLine(N_STOP_LIST)
    schedule_df = pd.concat(schedule_dfs)
    schedule_df.sort_index(inplace=True)
    # Remove 0 UNIX time values, safeguard to avoid exploding memory if 0 accidentally passed
    schedule_df = schedule_df['2000-01-01':]
    schedule_df = schedule_df.applymap(lambda x: line.stop_idx(x) if not isinstance(x, float) else x)
#    schedule_df.drop_duplicates(keep='last', inplace=True)
    schedule_df.reset_index(inplace=True)
    schedule_df.reset_index(inplace=True)

    return schedule_df
コード例 #3
0
def min_in_station(loader):
    """
    Calculate the cumulative minutes a train has been in a station
    :param loader: GTFSLoader to load data from
    :return: DataFrame of [ time x stop_id ], data is in units of minutes
    """
    import re
    schedule_dfs = []
    for stop, train in zip(loader.stop_dict.items(), loader.time_dict.items()):
        schedule_dfs.append(pd.DataFrame(stop[1], index=train[1], columns=[stop[0]]))
    line = SubwayLine(N_STOP_LIST)
    df = pd.concat(schedule_dfs)
    df.columns = df.columns.map(lambda x: float(re.sub(r'[^0-9]+', '', x)) if isinstance(x, str) else x)
    df = df.sort_index()['2000-01-01':]

    df = df.stack().reset_index()
    df.columns = ['time', 'train_id', 'stop_id']
    df = df.pivot_table(index='time', columns='stop_id', values='train_id', aggfunc=np.max)
    df.sort_index(inplace=True)
    df = df.resample('1T').last()

    min_in_station_df = df.copy()
    min_in_station_df[~df.isna()] = 1.0
    min_in_station_df = min_in_station_df.astype(float)

    for observing_stop in df.columns:
        min_in_station_df[observing_stop] = min_in_station_df[observing_stop].groupby(df[observing_stop]).cumsum()

    return min_in_station_df
コード例 #4
0
def test_stop_create():
    # Test creation of new stops
    stop_list = ['R1N', 'R2N', 'W3N']
    test_line = SubwayLine(stop_list)

    assert_raises(ValueError, test_line.stop, '1N')
    test_stop = test_line.stop('R1N')
    assert test_stop.stop_id == 'R1N'
    assert test_stop.stop_idx == 0

    assert_raises(IndexError, test_line.stop_from_idx, 5)
    assert_raises(IndexError, test_line.stop_from_idx, -1)

    test_stop = test_line.stop_from_idx(2)
    assert test_stop.stop_id == 'W3N'
    assert test_stop.stop_idx == 2
コード例 #5
0
ファイル: alerts.py プロジェクト: b-koopman/MTADelayPredict
def plot_alert(alert_time, observing_stop, alert_stop,
               stop_filter, route_filter,
               title='Northbound N Trains',
               data_dir = '../data/raw/status',
               start_window = 15,
               end_window = 60,
               ):
    """
    Create a windowed plot of train traffic around a certain train alert time, using matplotlib

    :param alert_time: Time alert was seen
    :param observing_stop: Stop we're looking for effects at, annotate this stop for comparison (green)
    :param alert_stop: Stop alert occured at, annotate stop for comparison (red)
    :return: figure of plot
    """
    import matplotlib.lines as mlines
    import matplotlib.pyplot as plt

    start_time = alert_time - pd.Timedelta(start_window, unit='m')
    end_time = alert_time + pd.Timedelta(end_window, unit='m')

    # Fetch schedule data and plot
    schedule_df = train_data.load_range_schedule(start_time, end_time, stop_filter, route_filter, data_dir)
    ax = traffic.plot_traffic(start_time, end_time, schedule_df)

    current_line = SubwayLine(N_STOP_LIST)
    # Annotate observing stop
    xmin, xmax = ax.get_xbound()
    ymin = ymax = current_line.stop_idx(observing_stop)
    stop_line = mlines.Line2D([xmin, xmax], [ymin, ymax], color='g')
    ax.add_line(stop_line)

    ymin = ymax = current_line.stop_idx(alert_stop)
    alert_line = mlines.Line2D([xmin, xmax], [ymin, ymax], color='r')
    ax.add_line(alert_line)

    xmin = xmax = alert_time
    ymin, ymax = ax.get_ybound()
    pd.DataFrame([[ymin], [ymax]], index=[xmin, xmax]).plot(color='r', ax=ax)
    ax.legend((stop_line, alert_line), ('observing stop', 'alert'))
    ax.set_xlabel('Time')
    ax.set_ylabel('Stop')
    ax.set_title("{} @ {}".format(title, alert_time))
    return plt.gcf()
コード例 #6
0
    def test_stop_comparison(self):
        """
        Stop comparisons
        """
        # Do some basic comparisons for our stops
        stop1 = self.test_line.stop('R1N')
        stop2 = self.test_line.stop('W3N')
        assert stop1 < stop2
        assert stop2 > stop1
        assert stop1 != stop2
        assert stop1 == stop1

        # Compare stops from a second line
        test_line2 = SubwayLine(self.stop_list)
        stop3 = test_line2.stop('W3N')
        assert stop1 < stop3
        assert stop3 > stop1
        assert stop1 != stop3
        # Different lines, but same stop_id
        assert stop2 == stop3
コード例 #7
0
def min_until_train(loader):
    """
    Calculate the cumulative minutes until the next train arrives

    :param loader: loaded GTFSLoader to extract data from
    :return: DataFrame of [time x stop_id] with value of minutes
    """
    import re

    schedule_dfs = []
    for stop, train in zip(loader.stop_dict.items(), loader.time_dict.items()):
        schedule_dfs.append(pd.DataFrame(stop[1], index=train[1], columns=[stop[0]]))
    line = SubwayLine(N_STOP_LIST)
    df = pd.concat(schedule_dfs)
    df.columns = df.columns.map(lambda x: float(re.sub(r'[^0-9]+', '', x)) if isinstance(x, str) else x)
    # TODO: Turn this into a data check
    df = df.sort_index()['2000-01-01':]

    df = df.stack().reset_index()
    df.columns = ['time', 'train_id', 'stop_id']
    df = df.pivot_table(index='time', columns='stop_id', values='train_id', aggfunc=np.max)
    df.sort_index(inplace=True)
    df = df.resample('1T').last()

    train_gaps = df.copy().shift(1)
    train_gaps[~df.isna()] = 0.0
    train_gaps.fillna(method='ffill', inplace=True)
    train_gaps.replace(0.0, np.nan, inplace=True)
    min_since_train_df = train_gaps.copy()

    min_since_train_df[~train_gaps.isna()] = 1.0
    min_since_train_df = min_since_train_df.astype(float)

    min_until_train_df = min_since_train_df.iloc[::-1].copy()
    reversed_train_gaps = train_gaps.iloc[::-1]
    reversed_train_gaps = reversed_train_gaps.reset_index(drop=True)
    min_until_train_df = min_until_train_df.reset_index(drop=True)

    for observing_stop in df.columns:
        min_until_train_df[observing_stop] = min_until_train_df[observing_stop].groupby(
            reversed_train_gaps[observing_stop]).cumsum()

    min_until_train_df = min_until_train_df.iloc[::-1]
    min_until_train_df.index = min_since_train_df.index

    return min_until_train_df
コード例 #8
0
def test_wrong_direction_line():
    bad_line = SubwayLine(['R1N', 'R2S'])
コード例 #9
0
def test_stop_idx():
    test_line = SubwayLine(['R16N', 'R15N'])
    assert test_line.stop_idx('R16N') == 0
コード例 #10
0
def test_underspecified_stops():
    bad_line = SubwayLine(['R1', 'R2'])
コード例 #11
0
 def setup_class(cls):
     cls.stop_list = ['R1N', 'R2N', 'W3N']
     cls.test_line = SubwayLine(cls.stop_list)