Ejemplo n.º 1
0
def dfBystop(stop, graph, allStations):
    '''Return dataframe with timelines for stations near input station and
    smoothed timelines'''

    mapFromId, mapToId = distances.getMappings(graph)
    subgraph = networkx.ego_graph(graph, stop, radius=5,
                                  undirected=True, distance='weight')
    columnNames = [mapFromId[node] for node in subgraph.nodes()]
    for station in subgraph.nodes():
        if station in allStations:
            filtered = filtfilt(b, a, allStations[station].interpolate().fillna(method='backfill'))
            stationDf = pd.DataFrame({station: allStations[station],
                                     (station + '_filtered'): filtered})
            try:
                tmpDf = pd.concat([tmpDf, stationDf], axis=1)
            except NameError:
                tmpDf = stationDf
    return tmpDf
Ejemplo n.º 2
0
    for station in subgraph.nodes():
        if station in allStations:
            filtered = filtfilt(b, a, allStations[station].interpolate().fillna(method='backfill'))
            stationDf = pd.DataFrame({station: allStations[station],
                                     (station + '_filtered'): filtered})
            try:
                tmpDf = pd.concat([tmpDf, stationDf], axis=1)
            except NameError:
                tmpDf = stationDf
    return tmpDf

if __name__ == "__main__":

    fulldata = util.pickle_load('data/fulldata_NOnearest_9-22.pkl')
    graph = util.pickle_load('subwaydata/NYCsubway_network_graph.pkl')
    mapFromId, mapToId = distances.getMappings(graph)

    allStations = processHistoricalData(fulldata, graph)
    station = mapToId['W 4 St']

    stationDf = dfBystop(station, graph, allStations)
    result = fitAR(stationDf, station, makePlot=True, order=(1, 1, 0),
                   showTest=True)
    stationDf = dfBystop(station, graph, allStations)
    result = fitGP(stationDf, station, showTest=True)

    # run GP for all stations and write to dB
    test_scores = {}
    for station in graph.nodes():
        if station in allStations:
            stationDf = dfBystop(station, graph, allStations)