예제 #1
0
    def predict_product(self, product_id):
        """
        Receives a product id and predicts
        """
        product_ts = self.__get_product_ts(product_id)

        model = SARIMAX(product_ts, order=(0,1,2),
                        time_varying_regression=True,
                        mle_regression=False,
                        trend='n',
                        seasonal_order=(1,1,1,11)).fit()
        steps = PREDICTION_TIME * 4
        forecast = model.get_forecast(steps=steps, dynamic=True)
        history = product_ts[(product_ts.index > "2015") & (product_ts.index < "2016")]
        history = history.fillna(0)
        # Output
        predicted_mean = forecast.predicted_mean
        conf_int = forecast.conf_int()
        return np.exp(history), np.exp(predicted_mean), np.exp(conf_int)
예제 #2
0
def sarimax_forecast(df):
    '''it takes a dataframe split it into train/forecast sets based on
    the availability of price and then forecasts electricity price for next hour.
    it returns forecast dataframe ('price','lower_interval', 'upper_interval') and
    historical price dataframe ('price')'''

    # split past and furture
    past = df[~df.price.isnull()]
    future = df[df.price.isnull()].drop('price', axis=1)
    # forecast for next time point only
    future = future.iloc[:1, :]
    if future.temp.isnull(
    )[0]:  # when weather forecast data is not available for that hour
        forecast = np.nan
        lower = np.nan
        upper = np.nan
        print('weather data is not available')
    else:
        past.index = pd.DatetimeIndex(past.index.values,
                                      freq=past.index.inferred_freq)
        # Build Model
        sarima = SARIMAX(past.price,
                         exog=past.drop('price', axis=1),
                         order=(1, 1, 1),
                         seasonal_order=(1, 0, 2, 7))
        sarima = sarima.fit(maxiter=300)
        # forecasting
        results = sarima.get_forecast(1, exog=future, alpha=0.05)
        forecast = sarima.forecast(1, exog=future, alpha=0.05)
        lower = results.conf_int()['lower price'][0]
        upper = results.conf_int()['upper price'][0]

    # create forecast df with datetimeIndex
    forecast = pd.DataFrame(dict(price=forecast,
                                 lower_interval=lower,
                                 upper_interval=upper),
                            index=future.index)
    forecast.index.name = 'date_time'
    past = past.iloc[-1:, 0]
    return forecast, past
예제 #3
0
def sarimax_forecast(hour=11):
    '''hour: hour of a day, range(0, 23),
    returns forecast, upper_intervals, lower_intervals, mape, mase, test, train'''

    df_all = get_data(hour=hour)

    # split past and furture
    past = df_all[~df_all.price.isnull()]
    future = df_all[df_all.price.isnull()].drop('price', axis=1)

    future = future.iloc[:1, :]
    if future.temp.isnull()[0]:
        forecast = np.array([np.nan])
        confidence_int = pd.DataFrame(
            {
                'lower price': np.nan,
                'upper price': np.nan
            }, index=['x'])

    else:
        past.index = pd.DatetimeIndex(past.index.values,
                                      freq=past.index.inferred_freq)
        # Build Model
        sarima = SARIMAX(past.price,
                         past.drop('price', axis=1),
                         order=(1, 1, 1),
                         seasonal_order=(1, 0, 2, 7))
        sarima = sarima.fit(maxiter=300)
        # forecasting
        results = sarima.get_forecast(1, exog=future, alpha=0.05)
        forecast = sarima.forecast(1, exog=future, alpha=0.05)
        confidence_int = results.conf_int()
    # create forecast df with datetimeIndex
    lower = confidence_int['lower price'][0]
    upper = confidence_int['upper price'][0]
    forecast = pd.DataFrame(dict(price=forecast, lower=lower, upper=upper),
                            index=future.index)
    past = past.iloc[-1:, 0]
    return forecast, past
def train_sarima(data=False,
                 hour=11,
                 split_date='2019-10-22 11:00:00',
                 n=30,
                 exog=False):
    '''hour: hour of a day, range(0, 23),
    split_date: train, test splitted on this date,
    n: number of days that will be forecasted,
    exog: in case of sarimax, takes (list of exog features, order, seasonal_order)
    returns forecast, upper_intervals, lower_intervals, mape, mase, test, train'''

    if isinstance(data, bool):
        if isinstance(exog, bool):
            df = get_daily(hour=hour)
        else:
            df = get_all(hour=hour)
    else:
        df = data
    # formating split_date
    split_date = pd.DatetimeIndex(np.array([split_date]))
    # get train and test for plotting only
    train = df[(df.index <= split_date[0])]
    test = df[(df.index > split_date[0]) & \
                      (df.index <= (split_date + pd.Timedelta(days=n))[0])]
    # will collect following information from forecast
    forecasts = []
    upper = []
    lower = []
    # loop over to get walk forward forecast for n days
    for i in range(1, n + 1):
        # walk one day forward to set train_set
        predict_date = df[df.index == split_date[0]].index + pd.Timedelta(
            days=i)
        train_set = df[df.index < predict_date[0]]
        train_set.index = pd.DatetimeIndex(train_set.index.values,
                                           freq=train_set.index.inferred_freq)
        # Build Model without exogenous features
        if isinstance(exog, bool):
            sarima = SARIMAX(train_set,
                             order=(1, 1, 1),
                             seasonal_order=(1, 0, 2, 7))
            sarima = sarima.fit(maxiter=200)
            # Forecast
            results = sarima.get_forecast(1, alpha=0.05)
            forecast = sarima.forecast(1, alpha=0.05)
            confidence_int = results.conf_int()
        # Build Model with exogenous features
        else:
            # StandardScaling the exogenous features
            # scaler = StandardScaler()
            # scaler = scaler.fit(train_set[['wind_speed', 'temp', 'humidity']])
            # train_set.loc[:,['wind_speed', 'temp', 'humidity']]=\
            # scaler.transform(train_set[['wind_speed', 'temp', 'humidity']])
            # training model
            sarima = SARIMAX(train_set.price,
                             exog=train_set[exog[0]],
                             order=exog[1],
                             seasonal_order=exog[2])
            sarima = sarima.fit(maxiter=200)
            # get features for forecast
            exog_fore = test[test.index == predict_date[0]][exog[0]]
            # scaling features for forecast
            # exog_fore.loc[:,['wind_speed', 'temp', 'humidity']]=\
            # scaler.transform(exog_fore[['wind_speed', 'temp', 'humidity']])
            # forecasting
            results = sarima.get_forecast(1, exog=exog_fore, alpha=0.05)
            forecast = sarima.forecast(1, exog=exog_fore, alpha=0.05)
            confidence_int = results.conf_int()
        # add forecast result into the list
        lower.append(confidence_int['lower price'][0])
        upper.append(confidence_int['upper price'][0])
        forecasts.append(forecast[0])

    # calculate the mape
    mape = get_mape(test.price, forecasts)
    mase = get_mase(test.price, forecasts, train.price)
    # create forecast df with datetimeIndex
    forecast = pd.DataFrame(forecasts, index=test.index, columns=['price'])

    return forecast, lower, upper, mape, mase, train, test
예제 #5
0
plt.show()


# %%
sarimax = SARIMAX(airpassengers_train, order=(3,1,1), seasonal_order=(0,1,0,12)).fit()
sarimax.summary()


# %%
sarimax.plot_diagnostics(figsize=(16, 8))
plt.show()


# %%
sarimax_forecast = sarimax.get_forecast(24)
sarimax_forecast_conf_int = sarimax_forecast.conf_int()


# %%
plt.plot(airpassengers_train, label='train')
plt.plot(airpassengers_test, label='test')
plt.plot(sarimax_forecast.predicted_mean, label='forecast')


plt.fill_between(sarimax_forecast_conf_int.index,
                 sarimax_forecast_conf_int.iloc[:, 0],
                 sarimax_forecast_conf_int.iloc[:, 1], color='k', alpha=.2)

plt.legend()
예제 #6
0
    best_model = SARIMAX(dft_f['cnt_smooth'], order=(p_best, 1, q_best), seasonal_order=(P_best, 1, Q_best, m)).fit(dis=-1)
    print(best_model.summary())

    best_model.plot_diagnostics(figsize=(12,8))
    plt.suptitle(f'Diagnostic Best model')
    plt.savefig(f'{baseSave}/diagnostic_plot_station_{s}.png', dpi=150)
    plt.clf()
    '''
    END: Compute Sarima optimization
    '''
    end_endto = args.forecast_upto.date() + timedelta(days=-1)
    dft_all = pd.read_csv(f'{baseSave}/smoothed_to_compare_s_{s}.csv', header=[0], index_col=[0], sep=';', parse_dates=True)
    dft_all_upto = dft_all.loc[selection[s]['stop']:end_endto.strftime('%Y-%m-%d')]

    dft_f_from = dft_f.loc[selection[s]['start']:]
    pred_uc = best_model.get_forecast(steps=pd.to_datetime(args.forecast_upto.date().strftime('%Y-%m-%d')))
    pred_ci = pred_uc.conf_int()
    ax = dft_f_from['cnt_smooth'].plot(label='Observed Smoothed Data', c='r', figsize=(12, 8))
    dft_all_upto['cnt_smooth'].plot(label='Observed "Forecasted" Data', c='b', figsize=(12, 8))
    pred_uc.predicted_mean.plot(ax=ax, label='Forecasted Data', c='k', linestyle='--')

    ax.set_xlabel('Date')
    ax.set_ylabel('Counter')
    ax.grid(which='major', linestyle='-')
    ax.grid(which='minor', linestyle='--')
    plt.legend()
    plt.title(f'Forecasted Prediction\n{map_station[s]} {args.forecast_upto.date()}')
    plt.savefig(f'{baseSave}/forecasted_prediction_station_{s}_to_{args.forecast_upto.date()}.png', dpi=150)
    plt.clf()
    os.remove(f'{baseSave}/smoothed_to_compare_s_{s}.csv')
예제 #7
0
train = btc[btc.index < pd.to_datetime("2020-11-01", format='%Y-%m-%d')]
test = btc[btc.index >= pd.to_datetime("2020-11-01", format='%Y-%m-%d')]
print(test)
plt.plot(train, color="black", label='Training')
plt.plot(test, color="red", label='Testing')
plt.ylabel('BTC Price')
plt.xlabel('Date')
plt.xticks(rotation=45)
plt.title("Train/Test split for BTC Data")

y = train['BTC-USD']

ARMAmodel = SARIMAX(y, order=(1, 0, 1))
ARMAmodel = ARMAmodel.fit()

y_pred = ARMAmodel.get_forecast(len(test.index))
y_pred_df = y_pred.conf_int(alpha=0.05)
y_pred_df["Predictions"] = ARMAmodel.predict(start=y_pred_df.index[0],
                                             end=y_pred_df.index[-1])
y_pred_df.index = test.index
y_pred_out = y_pred_df["Predictions"]
plt.plot(y_pred_out, color='green', label='ARMA Predictions')
plt.legend()

import numpy as np
from sklearn.metrics import mean_squared_error

arma_rmse = np.sqrt(
    mean_squared_error(test["BTC-USD"].values, y_pred_df["Predictions"]))
print("ARMA RMSE: ", arma_rmse)
예제 #8
0
pred_dynamic = sarima_model.get_prediction(start=pd.to_datetime('1998-01-01'), dynamic=True, full_results=True)
pred_dynamic_ci = pred_dynamic.conf_int()

ax = y['1990':].plot(label='observed', figsize=(15,12))
pred_dynamic.predicted_mean.plot(label='Dynamic Forecast', ax=ax)
ax.fill_between(pred_dynamic_ci.index,
                pred_dynamic_ci.iloc[:, 0],
                pred_dynamic_ci.iloc[:, 1], color='k', alpha=0.25)
ax.fill_betweenx(ax.get_ylim(), pd.to_datetime('1998-01-01'), y.index[-1], alpha=0.1, zorder=-1)
ax.set_xlabel('Date')
ax.set_ylabel('CO2 Levels')
plt.legend()
plt.show()

# Visualize forecasts 500 steps in the future
pred_uc = sarima_model.get_forecast(steps=500)
pred_ci = pred_uc.conf_int() 

ax = y.plot(label='observed', figsize=(15,12))
pred_uc.predicted_mean.plot(ax=ax, label='forecast')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:,0],
                pred_ci.iloc[:,1], color='k', alpha=0.25)
ax.set_xlabel('Date')
ax.set_ylabel('CO2 Levels')
plt.legend()
plt.show()


#### Seasonal ARIMA w/ Exogenous Regressors (SARIMAX) ####
"""
예제 #9
0
def fit_model_sarima(train, val, pdq, seasonal_pdq):
    sarima_model = SARIMAX(train, order=pdq, seasonal_order=seasonal_pdq).\
        fit(disp=0)
    y_pred_val = sarima_model.get_forecast(steps=48)
    y_pred = y_pred_val.predicted_mean
    return mean_absolute_error(val, y_pred)
예제 #10
0
def train_sarimax(data=False,
                  exog=False,
                  hour=11,
                  split_date='2019-10-22 11:00:00',
                  n=30):
    '''
    hour: hour of a day (0, 23),
    exog: takes ([exog features], order, seasonal_order)
    split_date: train, test splitted on this date,
    n: number of days to forecast,
    returns test('price', 'forecast_price', 'lower_interval', 'upper_interval'),
    train('price') and MAPE, MASE'''

    if isinstance(data, bool):
        if isinstance(exog, bool):
            df = get_daily_price(hour=hour)
        else:
            df = get_daily_data(hour=hour)
    else:
        df = data
    # formating split_date
    split_date = pd.DatetimeIndex(np.array([split_date]))
    split_date = split_date[0]
    # get train and test for plotting only
    train = df[(df.index <= split_date)]
    test = df[(df.index > split_date) & \
                      (df.index <= (split_date + timedelta(days=n)))].copy()
    # loop over to get walk forward forecast for n days
    for i in range(1, n + 1):
        print(
            colored(
                f'################# forecasting for day {i} ##################',
                'blue'))
        # walk one day forward to define train_set
        predict_date = split_date + timedelta(days=i)
        train_set = df[df.index < predict_date]
        train_set.index = pd.DatetimeIndex(train_set.index.values,
                                           freq=train_set.index.inferred_freq)
        # Build Model without exogenous features
        if isinstance(exog, bool):
            sarima = SARIMAX(train_set,
                             order=(1, 1, 1),
                             seasonal_order=(1, 0, 2, 7))
            sarima = sarima.fit(maxiter=200)
            # Forecast
            results = sarima.get_forecast(1, alpha=0.05)
            forecast = sarima.forecast(1, alpha=0.05)
            confidence_int = results.conf_int()
        # Build Model with exogenous features
        else:
            # training model
            sarima = SARIMAX(train_set.price,
                             exog=train_set[exog[0]],
                             order=exog[1],
                             seasonal_order=exog[2])
            sarima = sarima.fit(maxiter=200)
            # get features for forecast
            exog_fore = test[test.index == predict_date][exog[0]]
            # forecasting
            results = sarima.get_forecast(1, exog=exog_fore, alpha=0.05)
            forecast = sarima.forecast(1, exog=exog_fore, alpha=0.05)
            confidence_int = results.conf_int()
        # add forecast result into the list
        test.loc[predict_date, 'forecast_price'] = forecast[0]
        test.loc[predict_date,
                 'lower_interval'] = confidence_int['lower price'][0]
        test.loc[predict_date,
                 'upper_interval'] = confidence_int['upper price'][0]

    train = train[['price']]
    test = test[[
        'price', 'forecast_price', 'upper_interval', 'lower_interval'
    ]]
    # calculate the mape
    mape = get_mape(test.price, test.forecast_price)
    mase = get_mase(test.price, test.forecast_price, train.price)

    return train, test, mape, mase
# ARIMA

# %%
# ARIMA parameters search
results_arima = pm.auto_arima(
    train['diff'], d=0, start_p=1, start_1=1, max_p=3, max_q=3)
print(results_arima.summary())

#%%
# using the ARIMA model
model_arima = SARIMAX(train['diff'], order=(3, 0, 2)).fit()
# prediction
prediction_arima = model_arima.get_prediction(
    start=-50, dynamic=True).predicted_mean
# forecasting
forecast_arima = model_arima.get_forecast(steps=20).predicted_mean

#%%
# model diagnostics
arima_residual = model_arima.resid
arima_mae = np.mean(np.abs(arima_residual))
print(arima_mae)

#%%
# pymarima results
results_arima.plot_diagnostics()
plt.show()

# %%
# plot ARIMAX
plt.plot(df['diff'], label='True values')