def predict_demand(regr, X, y): """ Predict tM3 according to the model t^M3 = tM1 + delta and aggregate the predicted tM3 to obtain the predicted demand regr is a regression model that can predict delays delta = tM3 - tM1 X is a matrix of features with X[:, 0] being tM1 y is a vector with tM3 """ params = regr.get_params() interval = params['interval'] tz = params['tz'] tm3 = pd.to_datetime(y) # transform y in data format y_true = atddm.binarrivals(tm3, interval=interval, tz=tz).fillna(0) # bin observed tM3 to obtain observed demand y_pred = atddm.binarrivals(regr.predict(X), interval=interval, tz=tz).fillna(0) # bin predicted tM3 to obtain predicted demand combined_indx = y_true.index.union(y_pred.index) y_true = y_true.reindex(index=combined_indx).fillna(0) y_pred = y_pred.reindex(index=combined_indx).fillna(0) # reindex predicted demand as observed one to avoid length mismatch return y_true, y_pred
ENDDT = pd.Timestamp(ENDDT) # ENDDT = BEGDT + pd.Timedelta(21, 'D') INTERVAL = 10 dd = atddm.load(subset=CODES) m3_bin = {} nairp = len(CODES) CODES.sort() for code in CODES: indx = pd.date_range(start=BEGDT, end=ENDDT, freq=str(INTERVAL) + 'min', tz=TZONES[code]) m3_bin[code] = atddm.binarrivals(dd[code].M3_FL240, interval=INTERVAL, tz=TZONES[code])[indx].fillna(0) if TRGT == 'talk': f, axes = plt.subplots(2, nairp // 2, sharey=False) else: f, axes = plt.subplots(nairp // 2, 2, sharey=False) for ax, code in zip(axes.flatten(), CODES): ts = m3_bin[code].fillna(0) ts = ts.diff().dropna() freq = 60*ts.index.freq.delta.components.hours +\ ts.index.freq.delta.components.minutes day0 = ts.index[0] days = int(len(ts) / (24 * 60 / freq)) begdays = [day0 + pd.Timedelta(i, 'D') for i in range(0, days)]
for t in range(0, 24 * 60, INTERVAL) ] end_times = [ midnight + pd.Timedelta((INTERVAL + t) * 60 - 1, unit='s') for t in range(0, 24 * 60, INTERVAL) ] slices = [(a.time(), b.time()) for a, b in zip(sta_times, end_times)] for code in CODES: df = dd[code] tz = TZONES[code] indx = pd.date_range(start=BEGDT, end=ENDDT, freq=str(INTERVAL) + 'min', tz=tz) m3_bin[code] = atddm.binarrivals(df.M3_FL240, interval=INTERVAL, tz=tz)[indx].fillna(0) m3_bin[code].index = m3_bin[code].index.tz_localize(None) m1_bin[code] = atddm.binarrivals(df.M1_FL240, interval=INTERVAL, tz=tz)[indx].fillna(0) m1_bin[code].index = m1_bin[code].index.tz_localize(None) tmp = df.M1_FL240 + np.array(df.delay.sample(n=len(df), replace=True)) psra_bin[code] = atddm.binarrivals(tmp, interval=INTERVAL, tz=tz)[indx].fillna(0) psra_bin[code].index = psra_bin[code].index.tz_localize(None) daily = {} daily_psra = {} for code in CODES: tz = TZONES[code] daily[code] = atddm.daily_avg(m3_bin[code], tz=tz) daily[code].index = daily[code].index.tz_localize(None)