def test_issue_286(): mod = ARIMA(order=(1, 1, 2)) mod.fit(wineind) with pytest.raises(ValueError) as ve: mod.predict_in_sample(start=0) assert "In-sample predictions undefined for" in pytest_error_str(ve)
def test_with_seasonality1(): fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), suppress_warnings=True).fit(y=wineind) _try_get_attrs(fit) # R code AIC result is ~3004 assert abs(fit.aic() - 3004) < 100 # show equal within 100 or so # R code AICc result is ~3005 assert abs(fit.aicc() - 3005) < 100 # show equal within 100 or so # R code BIC result is ~3017 assert abs(fit.bic() - 3017) < 100 # show equal within 100 or so # show we can predict in-sample fit.predict_in_sample() # test with SARIMAX confidence intervals fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
# plt.figure() # fitted_values = arima.predict_in_sample() # plt.plot(df.index[:train_len - 1], fitted_values, # color='C0', label="Fitted values") # plt.plot(pd.to_datetime(df.index), data, color='C1', label="Data") # plt.plot(df.index[:train_len - 1], arima.resid(), # color='C2', label="Residuals") # plt.gca().grid(which='both', axis='x', linestyle='--') # plt.title("Residuals and fitted values") # plt.legend() print("SSE: {}".format((arima.resid()**2).sum())) # Plot fitted values and forecasts predictions = arima.predict(n_periods=test.shape[0]) fitted_values = arima.predict_in_sample() plt.figure() plt.plot(df.index[train_len:], test, '--', color='C0', label="test set") plt.plot(df.index[train_len:], predictions, '--', color='C1', label="forecasted values") plt.plot(df.index[:train_len], train, color='C0', label="train set") plt.plot(df.index[:train_len - 1], fitted_values, color='C1', label="fitted values") plt.legend() plt.title("Fitted values and forecasts")
def model_plot(days): days = int(days) pd.plotting.register_matplotlib_converters() df = pd.read_csv('data/new_york.csv') df['Date'] = pd.to_datetime(df['Date']) #converting data to daily usage. df.index = df.Date df = df.drop('Date', axis=1) # resample the dataframe every 1 day (D) and sum ovr each day df = df.resample('D').sum() df = df.tz_localize(None) nyc_weather = pd.read_csv('data/weather/weatherNY.csv') nyc_weather['DATE'] = pd.to_datetime(nyc_weather['DATE']) nyc_weather = nyc_weather.set_index('DATE') nyc_weather.drop(['NAME','STATION'],axis=1,inplace=True) nyc_weather = nyc_weather['2015-07-01':'2020-08-10'] df = df[:'2020-08-10'] #trying 1 day increments with EXOG. MAYBE BEST CANDIDATE? with fourier terms june to june as 638 and august to august 516 day = days real_values = [] predictions = [] df1 = df["2016":"2019"] nyc_weather = nyc_weather["2016":"2019"] y = df1.Consumption exog = pd.DataFrame({'date': y.index}) exog = exog.set_index(pd.PeriodIndex(exog['date'], freq='D')) exog['is_weekend'] = np.where(exog.index.dayofweek < 5,0,1) #add weather data exog['TMIN'] = nyc_weather['TMIN'].values exog['sin1'] = np.sin(2 * np.pi * exog.index.dayofyear / 638) exog['cos1'] = np.cos(2 * np.pi * exog.index.dayofyear / 638) exog['sin2'] = np.sin(4 * np.pi * exog.index.dayofyear /638) exog['cos2'] = np.cos(4 * np.pi * exog.index.dayofyear /638) exog['sin3'] = np.sin(2 * np.pi * exog.index.dayofyear / 516) exog['cos3'] = np.cos(2 * np.pi * exog.index.dayofyear / 516) exog['sin4'] = np.sin(4 * np.pi * exog.index.dayofyear /516) exog['cos4'] = np.cos(4 * np.pi * exog.index.dayofyear /516) exog = exog.drop(columns=['date']) num_to_update = 0 y_to_train = y.iloc[:(len(y)-100)] exog_to_train = exog.iloc[:(len(y)-100)] dates = [] steps = [] for i in range(5): #first iteration train the model if i == 0: arima_exog_model = ARIMA(order=(3, 0, 1), seasonal_order=(2, 0, 0, 7),exogenous=exog_to_train, error_action='ignore', initialization='approximate_diffuse', suppress_warnings=True).fit(y=y_to_train) preds = arima_exog_model.predict_in_sample(exog_to_train) #first prediction y_to_test = y.iloc[(len(y)-100):(len(y)-100+day)] y_exog_to_test = exog.iloc[(len(y)-100):(len(y)-100+day)] y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=y_exog_to_test) real_values.append(y_to_test.values) predictions.append(y_arima_exog_forecast.tolist()) dates.append(y_to_test.index) steps.append(y_to_test.index[-1]) #y_arima_exog_forecast = arima_exog_model.predict(n_periods=2, exogenous=exog_to_test) else: y_to_update = y.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day] exog_to_update = exog.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day] #to test to_test = y.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)] exog_to_test = exog.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)] #update the model arima_exog_model.update(y_to_update,exogenous=exog_to_update) y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=exog_to_test) dates.append(to_test.index) steps.append(to_test.index[-1]) predictions.append(y_arima_exog_forecast.tolist()) real_values.append(to_test.values) num_to_update += day predict = [item for sublist in predictions for item in sublist] true = [item for sublist in real_values for item in sublist] dates = [item for sublist in dates for item in sublist] #for viz purposes y_to_train2 = y_to_train[-200:] preds = preds[-200:] y_to_train2 = y_to_train2.to_frame() fig = go.Figure() # Create and style traces fig.add_trace(go.Scatter(x=y_to_train2.index, y=y_to_train2.Consumption, name='True values', line=dict(color='firebrick', width=4,dash='dot'))) fig.add_trace(go.Scatter(x=y_to_train2.index, y=preds[-200:], name='In-sample Prediction', line=dict(color='royalblue', width=4))) fig.add_trace(go.Scatter(x=dates, y=predict, name='Prediction', line=dict(color='green', width=4))) fig.add_trace(go.Scatter(x=dates, y=true, name='True', line=dict(color='firebrick', width=4,dash='dot'))) fig.update_layout(title='Electricity Consumption in New York', xaxis_title='Date', yaxis_title='Consumption', xaxis_showgrid=True, yaxis_showgrid=True, #autosize=False, #width=500, #height=500, paper_bgcolor=app_colors['background'], plot_bgcolor=app_colors['background']) return fig