Exemple #1
0
def test_issue_286():
    mod = ARIMA(order=(1, 1, 2))
    mod.fit(wineind)

    with pytest.raises(ValueError) as ve:
        mod.predict_in_sample(start=0)
    assert "In-sample predictions undefined for" in pytest_error_str(ve)
Exemple #2
0
def test_with_seasonality1():
    fit = ARIMA(order=(1, 1, 1),
                seasonal_order=(0, 1, 1, 12),
                suppress_warnings=True).fit(y=wineind)
    _try_get_attrs(fit)

    # R code AIC result is ~3004
    assert abs(fit.aic() - 3004) < 100  # show equal within 100 or so

    # R code AICc result is ~3005
    assert abs(fit.aicc() - 3005) < 100  # show equal within 100 or so

    # R code BIC result is ~3017
    assert abs(fit.bic() - 3017) < 100  # show equal within 100 or so

    # show we can predict in-sample
    fit.predict_in_sample()

    # test with SARIMAX confidence intervals
    fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
# plt.figure()
# fitted_values = arima.predict_in_sample()
# plt.plot(df.index[:train_len - 1], fitted_values,
#          color='C0', label="Fitted values")
# plt.plot(pd.to_datetime(df.index), data, color='C1', label="Data")
# plt.plot(df.index[:train_len - 1], arima.resid(),
#          color='C2', label="Residuals")
# plt.gca().grid(which='both', axis='x', linestyle='--')
# plt.title("Residuals and fitted values")
# plt.legend()

print("SSE: {}".format((arima.resid()**2).sum()))

# Plot fitted values and forecasts
predictions = arima.predict(n_periods=test.shape[0])
fitted_values = arima.predict_in_sample()
plt.figure()
plt.plot(df.index[train_len:], test, '--', color='C0', label="test set")
plt.plot(df.index[train_len:],
         predictions,
         '--',
         color='C1',
         label="forecasted values")
plt.plot(df.index[:train_len], train, color='C0', label="train set")
plt.plot(df.index[:train_len - 1],
         fitted_values,
         color='C1',
         label="fitted values")
plt.legend()
plt.title("Fitted values and forecasts")
Exemple #4
0
def model_plot(days):
    days = int(days)
    pd.plotting.register_matplotlib_converters()

    df = pd.read_csv('data/new_york.csv')
    df['Date'] = pd.to_datetime(df['Date'])

    #converting data to daily usage.
    df.index = df.Date
    df = df.drop('Date', axis=1)
    # resample the dataframe every 1 day (D) and sum ovr each day
    df = df.resample('D').sum()
    df = df.tz_localize(None)

    nyc_weather = pd.read_csv('data/weather/weatherNY.csv')
    nyc_weather['DATE'] = pd.to_datetime(nyc_weather['DATE'])
    nyc_weather = nyc_weather.set_index('DATE')
    nyc_weather.drop(['NAME','STATION'],axis=1,inplace=True)
    nyc_weather = nyc_weather['2015-07-01':'2020-08-10']

    df = df[:'2020-08-10']

    #trying 1 day increments with EXOG. MAYBE BEST CANDIDATE? with fourier terms june to june as 638 and august to august 516
    day = days
    real_values = []
    predictions = []

    df1 = df["2016":"2019"]
    nyc_weather = nyc_weather["2016":"2019"]

    y = df1.Consumption

    exog = pd.DataFrame({'date': y.index})
    exog = exog.set_index(pd.PeriodIndex(exog['date'], freq='D'))
    exog['is_weekend'] = np.where(exog.index.dayofweek < 5,0,1)

    #add weather data
    exog['TMIN'] = nyc_weather['TMIN'].values
    exog['sin1'] = np.sin(2 * np.pi * exog.index.dayofyear / 638)
    exog['cos1'] = np.cos(2 * np.pi * exog.index.dayofyear / 638)
    exog['sin2'] = np.sin(4 * np.pi * exog.index.dayofyear /638)
    exog['cos2'] = np.cos(4 * np.pi * exog.index.dayofyear /638)
    exog['sin3'] = np.sin(2 * np.pi * exog.index.dayofyear / 516)
    exog['cos3'] = np.cos(2 * np.pi * exog.index.dayofyear / 516)
    exog['sin4'] = np.sin(4 * np.pi * exog.index.dayofyear /516)
    exog['cos4'] = np.cos(4 * np.pi * exog.index.dayofyear /516)



    exog = exog.drop(columns=['date'])

    num_to_update = 0
    y_to_train = y.iloc[:(len(y)-100)]    
    exog_to_train = exog.iloc[:(len(y)-100)]

    dates = []

    steps = []

    for i in range(5):

        #first iteration train the model
        if i == 0:
            arima_exog_model = ARIMA(order=(3, 0, 1), seasonal_order=(2, 0, 0, 7),exogenous=exog_to_train, error_action='ignore',
                                    initialization='approximate_diffuse', suppress_warnings=True).fit(y=y_to_train)  

            preds = arima_exog_model.predict_in_sample(exog_to_train)            
            #first prediction
            y_to_test = y.iloc[(len(y)-100):(len(y)-100+day)]
            y_exog_to_test = exog.iloc[(len(y)-100):(len(y)-100+day)]
            y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=y_exog_to_test)
            
            real_values.append(y_to_test.values)
            predictions.append(y_arima_exog_forecast.tolist())
            
            dates.append(y_to_test.index)
            steps.append(y_to_test.index[-1])
                                                    
            #y_arima_exog_forecast = arima_exog_model.predict(n_periods=2, exogenous=exog_to_test)
        else:
            y_to_update = y.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day]
            exog_to_update = exog.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day]

            #to test
            to_test = y.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)]
            exog_to_test = exog.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)]
            #update the model

            arima_exog_model.update(y_to_update,exogenous=exog_to_update)
            y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=exog_to_test)

            dates.append(to_test.index)
            steps.append(to_test.index[-1])

            predictions.append(y_arima_exog_forecast.tolist())    
            real_values.append(to_test.values)
            
            num_to_update += day


    predict =  [item for sublist in predictions for item in sublist]
    true = [item for sublist in real_values for item in sublist]
    dates = [item for sublist in dates for item in sublist]

    #for viz purposes
    y_to_train2 = y_to_train[-200:]
    preds = preds[-200:]
    y_to_train2 = y_to_train2.to_frame()
    fig = go.Figure()
    # Create and style traces
    fig.add_trace(go.Scatter(x=y_to_train2.index, y=y_to_train2.Consumption, name='True values',
                            line=dict(color='firebrick', width=4,dash='dot')))

    fig.add_trace(go.Scatter(x=y_to_train2.index, y=preds[-200:], name='In-sample Prediction',
                            line=dict(color='royalblue', width=4)))

    fig.add_trace(go.Scatter(x=dates, y=predict, name='Prediction',
                            line=dict(color='green', width=4)))

    fig.add_trace(go.Scatter(x=dates, y=true, name='True',
                            line=dict(color='firebrick', width=4,dash='dot')))

    fig.update_layout(title='Electricity Consumption in New York',
                    xaxis_title='Date',
                    yaxis_title='Consumption',
                    xaxis_showgrid=True,
                    yaxis_showgrid=True,
                    #autosize=False,
                    #width=500,
                    #height=500,
                    paper_bgcolor=app_colors['background'], 
                    plot_bgcolor=app_colors['background'])


    return fig