def test_oob_sarimax(): xreg = rs.rand(wineind.shape[0], 2) fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), out_of_sample_size=15).fit(y=wineind, exogenous=xreg) fit_no_oob = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), out_of_sample_size=0, suppress_warnings=True).fit(y=wineind[:-15], exogenous=xreg[:-15, :]) # now assert some of the same things here that we did in the former test oob = fit.oob() # compare scores: scoring = get_callable(fit_no_oob.scoring, VALID_SCORING) no_oob_preds = fit_no_oob.predict(n_periods=15, exogenous=xreg[-15:, :]) assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2) # show params are no longer the same assert not np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2) # show we can add the new samples and get the exact same forecasts xreg_test = rs.rand(5, 2) fit_no_oob.update(wineind[-15:], xreg[-15:, :]) assert np.allclose(fit.predict(5, xreg_test), fit_no_oob.predict(5, xreg_test), rtol=1e-2) # Show we can get a confidence interval out here preds, conf = fit.predict(5, xreg_test, return_conf_int=True) assert all(isinstance(a, np.ndarray) for a in (preds, conf))
class ARIMAModel(BaseModel): def __init__(self): """ Initialize Model """ self.seasonal = True self.metric = 'mse' self.model = None self.model_init = False def _build(self, **config): """ build the models and initialize. :param config: hyperparameters for the model """ p = config.get('p', 2) d = config.get('d', 0) q = config.get('q', 2) self.seasonal = config.get('seasonality_mode', True) P = config.get('P', 1) D = config.get('D', 0) Q = config.get('Q', 1) m = config.get('m', 7) self.metric = config.get('metric', self.metric) order = (p, d, q) if not self.seasonal: seasonal_order = (0, 0, 0, 0) else: seasonal_order = (P, D, Q, m) self.model = ARIMA(order=order, seasonal_order=seasonal_order, suppress_warnings=True) def fit_eval(self, data, validation_data, **config): """ Fit on the training data from scratch. :param data: A 1-D numpy array as the training data :param validation_data: A 1-D numpy array as the evaluation data :return: the evaluation metric value """ if not self.model_init: # Estimating differencing term (d) and seasonal differencing term (D) kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6) adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6) d = max(adf_diffs, kpss_diffs) D = 0 if not self.seasonal else nsdiffs(data, m=7, max_D=12) config.update(d=d, D=D) self._build(**config) self.model_init = True self.model.fit(data) val_metric = self.evaluate(x=None, target=validation_data, metrics=[self.metric])[0].item() return {self.metric: val_metric} def predict(self, x=None, horizon=24, update=False, rolling=False): """ Predict horizon time-points ahead the input x in fit_eval :param x: ARIMA predicts the horizon steps foreward from the training data. So x should be None as it is not used. :param horizon: the number of steps forward to predict :param update: whether to update the original model :param rolling: whether to use rolling prediction :return: predicted result of length horizon """ if x is not None: raise ValueError("x should be None") if update and not rolling: raise Exception( "We don't support updating model without rolling prediction currently" ) if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling predict" ) if not update and not rolling: forecasts = self.model.predict(n_periods=horizon) elif rolling: if not update: self.save("tmp.pkl") forecasts = [] for step in range(horizon): fc = self.model.predict(n_periods=1).item() forecasts.append(fc) # Updates the existing model with a small number of MLE steps for rolling prediction self.model.update(fc) if not update: self.restore("tmp.pkl") os.remove("tmp.pkl") return forecasts def evaluate(self, target, x=None, metrics=['mse'], rolling=False): """ Evaluate on the prediction results and y. We predict horizon time-points ahead the input x in fit_eval before evaluation, where the horizon length equals the second dimension size of y. :param target: target for evaluation. :param x: ARIMA predicts the horizon steps foreward from the training data. So x should be None as it is not used. :param metrics: a list of metrics in string format :param rolling: whether to use rolling prediction :return: a list of metric evaluation results """ if x is not None: raise ValueError("We don't support input x currently") if target is None: raise ValueError("Input invalid target of None") if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling evaluate" ) forecasts = self.predict(horizon=len(target), rolling=rolling) return [Evaluator.evaluate(m, target, forecasts) for m in metrics] def save(self, checkpoint_file): if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling save") with open(checkpoint_file, 'wb') as fout: pickle.dump(self.model, fout) def restore(self, checkpoint_file): with open(checkpoint_file, 'rb') as fin: self.model = pickle.load(fin) self.model_init = True
def test_oob_for_issue_28(): # Continuation of above: can we do one with an exogenous array, too? xreg = rs.rand(hr.shape[0], 4) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=10).fit(y=hr, exogenous=xreg) oob = arima.oob() assert not np.isnan(oob) # Assert that the endog shapes match. First is equal to the original, # and the second is the differenced array assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2) assert arima.arima_res_.model.endog.shape[0] == hr.shape[0] # Now assert the same for exog assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2) assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0] # Compare the OOB score to an equivalent fit on data - 10 obs, but # without any OOB scoring, and we'll show that the OOB scoring in the # first IS in fact only applied to the first (train - n_out_of_bag) # samples arima_no_oob = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=0).fit(y=hr[:-10], exogenous=xreg[:-10, :]) scoring = val.get_scoring_metric(arima_no_oob.scoring) preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :]) assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2) # Show that the model parameters are not the same because the model was # updated. xreg_test = rs.rand(5, 4) assert not np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2) # Now assert on the forecast differences. with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test) no_oob_forecasts = arima_no_oob.predict(n_periods=5, exogenous=xreg_test) with pytest.raises(AssertionError): assert_array_almost_equal(with_oob_forecasts, no_oob_forecasts) # But after we update the no_oob model with the latest data, we should # be producing the same exact forecasts # First, show we'll fail if we try to add observations with no exogenous with pytest.raises(ValueError): arima_no_oob.update(hr[-10:], None) # Also show we'll fail if we try to add mis-matched shapes of data with pytest.raises(ValueError): arima_no_oob.update(hr[-10:], xreg_test) # Show we fail if we try to add observations with a different dim exog with pytest.raises(ValueError): arima_no_oob.update(hr[-10:], xreg_test[:, :2]) # Actually add them now, and compare the forecasts (should be the same) arima_no_oob.update(hr[-10:], xreg[-10:, :]) assert np.allclose(with_oob_forecasts, arima_no_oob.predict(n_periods=5, exogenous=xreg_test), rtol=1e-2)
def model_plot(days): days = int(days) pd.plotting.register_matplotlib_converters() df = pd.read_csv('data/new_york.csv') df['Date'] = pd.to_datetime(df['Date']) #converting data to daily usage. df.index = df.Date df = df.drop('Date', axis=1) # resample the dataframe every 1 day (D) and sum ovr each day df = df.resample('D').sum() df = df.tz_localize(None) nyc_weather = pd.read_csv('data/weather/weatherNY.csv') nyc_weather['DATE'] = pd.to_datetime(nyc_weather['DATE']) nyc_weather = nyc_weather.set_index('DATE') nyc_weather.drop(['NAME','STATION'],axis=1,inplace=True) nyc_weather = nyc_weather['2015-07-01':'2020-08-10'] df = df[:'2020-08-10'] #trying 1 day increments with EXOG. MAYBE BEST CANDIDATE? with fourier terms june to june as 638 and august to august 516 day = days real_values = [] predictions = [] df1 = df["2016":"2019"] nyc_weather = nyc_weather["2016":"2019"] y = df1.Consumption exog = pd.DataFrame({'date': y.index}) exog = exog.set_index(pd.PeriodIndex(exog['date'], freq='D')) exog['is_weekend'] = np.where(exog.index.dayofweek < 5,0,1) #add weather data exog['TMIN'] = nyc_weather['TMIN'].values exog['sin1'] = np.sin(2 * np.pi * exog.index.dayofyear / 638) exog['cos1'] = np.cos(2 * np.pi * exog.index.dayofyear / 638) exog['sin2'] = np.sin(4 * np.pi * exog.index.dayofyear /638) exog['cos2'] = np.cos(4 * np.pi * exog.index.dayofyear /638) exog['sin3'] = np.sin(2 * np.pi * exog.index.dayofyear / 516) exog['cos3'] = np.cos(2 * np.pi * exog.index.dayofyear / 516) exog['sin4'] = np.sin(4 * np.pi * exog.index.dayofyear /516) exog['cos4'] = np.cos(4 * np.pi * exog.index.dayofyear /516) exog = exog.drop(columns=['date']) num_to_update = 0 y_to_train = y.iloc[:(len(y)-100)] exog_to_train = exog.iloc[:(len(y)-100)] dates = [] steps = [] for i in range(5): #first iteration train the model if i == 0: arima_exog_model = ARIMA(order=(3, 0, 1), seasonal_order=(2, 0, 0, 7),exogenous=exog_to_train, error_action='ignore', initialization='approximate_diffuse', suppress_warnings=True).fit(y=y_to_train) preds = arima_exog_model.predict_in_sample(exog_to_train) #first prediction y_to_test = y.iloc[(len(y)-100):(len(y)-100+day)] y_exog_to_test = exog.iloc[(len(y)-100):(len(y)-100+day)] y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=y_exog_to_test) real_values.append(y_to_test.values) predictions.append(y_arima_exog_forecast.tolist()) dates.append(y_to_test.index) steps.append(y_to_test.index[-1]) #y_arima_exog_forecast = arima_exog_model.predict(n_periods=2, exogenous=exog_to_test) else: y_to_update = y.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day] exog_to_update = exog.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day] #to test to_test = y.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)] exog_to_test = exog.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)] #update the model arima_exog_model.update(y_to_update,exogenous=exog_to_update) y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=exog_to_test) dates.append(to_test.index) steps.append(to_test.index[-1]) predictions.append(y_arima_exog_forecast.tolist()) real_values.append(to_test.values) num_to_update += day predict = [item for sublist in predictions for item in sublist] true = [item for sublist in real_values for item in sublist] dates = [item for sublist in dates for item in sublist] #for viz purposes y_to_train2 = y_to_train[-200:] preds = preds[-200:] y_to_train2 = y_to_train2.to_frame() fig = go.Figure() # Create and style traces fig.add_trace(go.Scatter(x=y_to_train2.index, y=y_to_train2.Consumption, name='True values', line=dict(color='firebrick', width=4,dash='dot'))) fig.add_trace(go.Scatter(x=y_to_train2.index, y=preds[-200:], name='In-sample Prediction', line=dict(color='royalblue', width=4))) fig.add_trace(go.Scatter(x=dates, y=predict, name='Prediction', line=dict(color='green', width=4))) fig.add_trace(go.Scatter(x=dates, y=true, name='True', line=dict(color='firebrick', width=4,dash='dot'))) fig.update_layout(title='Electricity Consumption in New York', xaxis_title='Date', yaxis_title='Consumption', xaxis_showgrid=True, yaxis_showgrid=True, #autosize=False, #width=500, #height=500, paper_bgcolor=app_colors['background'], plot_bgcolor=app_colors['background']) return fig