def model(data_train_grouped_sales):
     grouped_sales_deseason = data_train_grouped_sales.diff(periods=12)
     # 1,1,1 ARIMA Model
     model = smt.SARIMAX(data_train_grouped_sales.values,
                         order=(2, 1, 1),
                         seasonal_order=(1, 0, 0, 12)).fit()
     print(model.summary())
     return (model)
Example #2
0
def iterative_SARIMA_fit(ts,
                         max_ar=2,
                         max_dff=1,
                         max_ma=2,
                         s_max_ar=2,
                         s_max_diff=1,
                         s_max_ma=2,
                         s=7):
    """ Iterates within the allowed values of the p and q parameters
    Returns a dictionary with the successful fits.
    Keys correspond to models.
    """
    ts = ts.astype('float')
    SARIMA_fit_results = {}

    min_aic = np.inf
    min_aic_fit_order = None
    min_aic_fit_res = None

    for AR in range(max_ar + 1):
        for Diff in range(max_dff + 1):
            for MA in range(max_ma + 1):
                for sAR in range(s_max_ar + 1):
                    for sDiff in range(s_max_diff + 1):
                        for sMA in range(s_max_ma + 1):
                            model = smt.SARIMAX(ts,
                                                order=(AR, Diff, MA),
                                                seasonal_order=(sAR, sDiff,
                                                                sMA, s))
                            try:
                                results_SARIMA = model.fit(disp=False,
                                                           method='lbfgs')
                                fit_is_available = True
                            except:
                                # print("\tDidn't find a fit")
                                continue

                            if fit_is_available:
                                # print("\tFound a fit (%d,%d,%d)" % (AR, Diff, MA))
                                # print("\tAIC score =", results_ARIMA.aic)
                                SARIMA_fit_results[
                                    '%d-%d-%d--%d-%d-%d-%d' % (
                                        AR, Diff, MA, sAR, sDiff, sMA, s)] = \
                                    results_SARIMA
                                if results_SARIMA.aic < min_aic:
                                    min_aic = results_SARIMA.aic
                                    min_aic_fit_order = (AR, Diff, MA, sAR,
                                                         sDiff, sMA, s)
                                    # min_aic_fit_res = ARIMA_fit_results
                                    min_aic_fit_res = results_SARIMA

    return SARIMA_fit_results, min_aic, min_aic_fit_order, min_aic_fit_res
Example #3
0
def arima_predictor_ver2(df_ts):
    series = df_ts['orders']
    sarimax_mod = tsa.SARIMAX(endog=series, order=(2,1,0), seasonal_order=(1,1,0,12))
    sarimax_res = sarimax_mod.fit()
    sarimax_res.summary()

    predict, cov, ci, idx = sarimax_res.predict(alpha=0.05, start=0, end=len(series))

    # show forecast
    print predict

    # show problematic value in forecast
    print predict[0][12]
Example #4
0
def fit_sarimax(ts_train,
                ts_test,
                order=(1, 0, 1),
                seasonal_order=(0, 0, 0, 0),
                exog_train=None,
                exog_test=None,
                figsize=(15, 10)):
    ## checks
    check_trend = "Trend parameters: No differencing" if order[
        1] == 0 else "Trend parameters: d=" + str(order[1])
    print(check_trend)
    check_seasonality = "Seasonal parameters: No Seasonality" if (
        seasonal_order[3]
        == 0) & (np.sum(seasonal_order[0:3])
                 == 0) else "Seasonal parameters: Seasonality every " + str(
                     seasonal_order[3]) + " observations"
    print(check_seasonality)
    check_exog = "Exog parameters: Not given" if (exog_train is None) & (
        exog_test is None) else "Exog parameters: number of regressors=" + str(
            exog_train.shape[1])
    print(check_exog)

    ## train
    model = smt.SARIMAX(ts_train,
                        order=order,
                        seasonal_order=seasonal_order,
                        exog=exog_train,
                        enforce_stationarity=False,
                        enforce_invertibility=False).fit()
    dtf_train = ts_train.to_frame(name="ts")
    dtf_train["model"] = model.fittedvalues

    ## test
    dtf_test = ts_test.to_frame(name="ts")
    dtf_test["forecast"] = model.predict(start=len(ts_train),
                                         end=len(ts_train) + len(ts_test) - 1,
                                         exog=exog_test)

    ## evaluate
    dtf = dtf_train.append(dtf_test)
    title = "ARIMA " + str(order) if exog_train is None else "ARIMAX " + str(
        order)
    title = "S" + title + " x " + str(seasonal_order) if np.sum(
        seasonal_order) > 0 else title
    dtf = utils_evaluate_forecast(dtf, figsize=figsize, title=title)
    return dtf, model
def fit_garch(ts_train, ts_test, order=(1,0,1), seasonal_order=(0,0,0,0), exog_train=None, exog_test=None, figsize=(15,10)):
    ## train
    arima = smt.SARIMAX(ts_train, order=order, seasonal_order=seasonal_order, exog=exog_train, enforce_stationarity=False, enforce_invertibility=False).fit()
    garch = arch.arch_model(arima.resid, p=order[0], o=order[1], q=order[2], x=exog_train, dist='StudentsT', power=2.0, mean='Constant', vol='GARCH')
    model = garch.fit(update_freq=seasonal_order[3])
    dtf_train = ts_train.to_frame(name="ts")
    dtf_train["model"] = model.conditional_volatility
    
    ## test
    dtf_test = ts_test.to_frame(name="ts")
    dtf_test["forecast"] = model.forecast(horizon=len(ts_test))

    ## evaluate
    dtf = dtf_train.append(dtf_test)
    title = "GARCH ("+str(order[0])+","+str(order[2])+")" if order[0] != 0 else "ARCH ("+str(order[2])+")"
    dtf = utils_evaluate_forecast(dtf, figsize=figsize, title=title)
    return dtf, model
Example #6
0
def rolling_window(s,w,n,df):
    """
    s : start num
    w : window size
    n : predict number
    """

    pred_df = pd.DataFrame(columns=["passengers"])
    for i in range(s,s+n) :
        print(i, " / " , s+n)
        train_df = df[i:i+w]
        if train_df.__len__() < w:
            df = df.append(pred_df.iloc[-1])
            train_df = df[i:i + w]

        m = tsa.SARIMAX(train_df,
                        order=(1,1,1),seasonal_order=(1,1,1,12),
                        enforce_stationarity=False, enforce_invertibility=False).fit()
        forecast_1 = pd.DataFrame({"passengers" : m.forecast(steps=1)})
        pred_df = pd.concat([pred_df,forecast_1], axis=0)

    return pred_df
Example #7
0
def sarimax_statsmodels(timeseries, train_length, o, so):
    """
    Previsioni con il modello SARIMAX

    Parameters
    ----------
    timeseries : Series
        la serie temporale.
    train_length : int
        la lunghezza del set di train (in rapporto alla serie completa).
    o : iterable
        order del modello SARIMAX (per statsmodels).
    so : iterable
        seasonal_order del modello SARIMAX (per statsmodels).

    Returns
    -------
    None.

    """

    # controllo se i dati sono settimanali o giornalieri
    if so[3] == 52:
        f = 'W-MON'
    else:
        f = 'D'

    # creo il set di train
    train = timeseries[pd.date_range(
        start=timeseries.index[0],
        end=timeseries.index[int(len(timeseries) * train_length) - 1],
        freq=f)]

    # adatto il modello ai dati
    model = smt.SARIMAX(train, order=o, seasonal_order=so, trend='c').fit()
    #model = pm.auto_arima(train, seasonal=True, m=m, suppress_warnings=True, trace=True,
    #start_p=1, start_q=1, max_p=1, max_q=1, start_P=1, start_Q=1, max_P=1, max_Q=1)

    # stampo i parametri del modello e controllo la sua bontà
    print(model.summary())
    plt.figure(figsize=(40, 20), dpi=80)
    model.plot_diagnostics(figsize=(40, 20))
    plt.show()

    # predizioni in-sample
    # https://www.statsmodels.org/stable/generated/statsmodels.tsa.statespace.sarimax.SARIMAXResults.get_prediction.html
    sarimax_mod = model.get_prediction(end=len(train) - 1, dynamic=False)
    sarimax_dates = pd.date_range(start=timeseries.index[0],
                                  end=timeseries.index[len(train) - 1],
                                  freq=f)
    sarimax_ts = pd.Series(sarimax_mod.predicted_mean, index=sarimax_dates)

    # predizioni out-of-sample
    # https://www.statsmodels.org/stable/generated/statsmodels.tsa.statespace.sarimax.SARIMAXResults.get_forecast.html
    fcast = model.get_forecast(steps=len(timeseries) - len(train))
    fcast_ci = fcast.conf_int()
    fcast_dates = pd.date_range(start=timeseries.index[len(train)],
                                periods=len(timeseries) - len(train),
                                freq=f)
    ts_fcast = pd.Series(fcast.predicted_mean, index=fcast_dates)

    # grafico del modello
    plt.figure(figsize=(40, 20), dpi=80)
    plt.title('Modello SARIMAX{}x{} per {}'.format(o, so, timeseries.name))
    ax = train.plot(label='Train set', color='black')
    sarimax_ts.plot(ax=ax, label='In-sample predictions', color='green')
    plt.legend()
    plt.show()

    # grafico delle previsioni
    plt.figure(figsize=(40, 20), dpi=80)
    plt.title('Forecasting con SARIMAX{}x{} per {}'.format(
        o, so, timeseries.name))
    ax = timeseries.plot(label='Observed', color='black')
    ts_fcast.plot(ax=ax,
                  label='Out-of-sample forecasts',
                  alpha=.7,
                  color='red')
    ax.fill_between(fcast_dates,
                    fcast_ci['lower ' + timeseries.name],
                    fcast_ci['upper ' + timeseries.name],
                    color='k',
                    alpha=.2)
    plt.legend()
    plt.show()

    # metriche di errore
    errore = ts_fcast - timeseries
    errore.dropna(inplace=True)
    print('MSE=%.4f' % (errore**2).mean())
    print('MAE=%.4f' % (abs(errore)).mean())
Example #8
0
aic_df = pd.DataFrame.from_dict(myDict, orient="index")
aic_df.columns = ["aic", "bic", "order", "s_order"]
aic_df["aic"].plot()
aic_df2 = aic_df[aic_df["aic"] < 335]
aic_df2["aic"].plot()

dir(best["model"])
best["model"].summary()
best["model"].plot_diagnostics()
(2,1,0,1,1,0)




m = tsa.SARIMAX(train_df_log,order=(1,1,1),seasonal_order=(1,1,1,12)).fit()
m.summary()
m.plot_diagnostics()

train_df_log
model_df = train_df_log.copy()
model_df["yhat"] = m.fittedvalues
model_df = model_df.iloc[1:,:]
model_df.iloc[:,:].plot()

m.fittedvalues
m_test_df = test_df_log.copy()

m_test_df["yhat"] = m.predict(start=test_df_log.index[0], end=test_df_log.index[-1])
m_test_df["resid"] = m_test_df["passengers"]-m_test_df["yhat"]
m_test_df["resid"].plot()
Example #9
0
def model_gridsearch(
    ts,
    p_min,
    d_min,
    q_min,
    p_max,
    d_max,
    q_max,
    sP_min,
    sD_min,
    sQ_min,
    sP_max,
    sD_max,
    sQ_max,
    trends,
    exog=None,
    s=None,
    enforce_stationarity=True,
    enforce_invertibility=True,
    simple_differencing=False,
    plot_diagnostics=False,
    verbose=False,
    filter_warnings=True,
):
    '''Run grid search of SARIMAX models and save results.
    '''

    cols = [
        'p', 'd', 'q', 'sP', 'sD', 'sQ', 's', 'trend', 'enforce_stationarity',
        'enforce_invertibility', 'simple_differencing', 'aic', 'bic', 'het_p',
        'norm_p', 'sercor_p', 'dw_stat', 'arroots_gt_1', 'maroots_gt_1',
        'datetime_run'
    ]

    # Initialize a DataFrame to store the results
    df_results = pd.DataFrame(columns=cols)

    # # Initialize a DataFrame to store the results
    # results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min,p_max+1)],
    #                            columns=['MA{}'.format(i) for i in range(q_min,q_max+1)])

    mod_num = 0
    for trend, p, d, q, sP, sD, sQ in itertools.product(
            trends,
            range(p_min, p_max + 1),
            range(d_min, d_max + 1),
            range(q_min, q_max + 1),
            range(sP_min, sP_max + 1),
            range(sD_min, sD_max + 1),
            range(sQ_min, sQ_max + 1),
    ):
        print(p, d, q, sP, sD, sQ, end='\r')
        # initialize to store results for this parameter set
        this_model = pd.DataFrame(index=[mod_num], columns=cols)

        if p == 0 and d == 0 and q == 0:
            continue

        try:
            model = smt.SARIMAX(ts,
                                trend=trend,
                                order=(p, d, q),
                                seasonal_order=(sP, sD, sQ, s),
                                enforce_stationarity=enforce_stationarity,
                                enforce_invertibility=enforce_invertibility,
                                simple_differencing=simple_differencing,
                                exog=exog)

            if filter_warnings is True:
                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore")
                    model_results = model.fit(disp=0)
            else:
                model_results = model.fit()

            if verbose:
                print(model_results.summary())

            if plot_diagnostics:
                model_results.plot_diagnostics()

            stat = model_resid_stats(model_results, verbose=verbose)

            this_model.loc[mod_num, 'p'] = p
            this_model.loc[mod_num, 'd'] = d
            this_model.loc[mod_num, 'q'] = q
            this_model.loc[mod_num, 'sP'] = sP
            this_model.loc[mod_num, 'sD'] = sD
            this_model.loc[mod_num, 'sQ'] = sQ
            this_model.loc[mod_num, 's'] = s
            this_model.loc[mod_num, 'trend'] = trend
            this_model.loc[mod_num,
                           'enforce_stationarity'] = enforce_stationarity
            this_model.loc[mod_num,
                           'enforce_invertibility'] = enforce_invertibility
            this_model.loc[mod_num,
                           'simple_differencing'] = simple_differencing

            this_model.loc[mod_num, 'aic'] = model_results.aic
            this_model.loc[mod_num, 'bic'] = model_results.bic

            # this_model.loc[mod_num, 'het_method'] = stat['het_method']
            # this_model.loc[mod_num, 'het_stat'] = stat['het_stat']
            this_model.loc[mod_num, 'het_p'] = stat['het_p']
            # this_model.loc[mod_num, 'norm_method'] = stat['norm_method']
            # this_model.loc[mod_num, 'norm_stat'] = stat['norm_stat']
            this_model.loc[mod_num, 'norm_p'] = stat['norm_p']
            # this_model.loc[mod_num, 'skew'] = stat['skew']
            # this_model.loc[mod_num, 'kurtosis'] = stat['kurtosis']
            # this_model.loc[mod_num, 'sercor_method'] = stat['sercor_method']
            # this_model.loc[mod_num, 'sercor_stat'] = stat['sercor_stat']
            this_model.loc[mod_num, 'sercor_p'] = stat['sercor_p']
            this_model.loc[mod_num, 'dw_stat'] = stat['dw_stat']
            this_model.loc[
                mod_num, 'arroots_gt_1'] = stat['arroots_outside_unit_circle']
            this_model.loc[
                mod_num, 'maroots_gt_1'] = stat['maroots_outside_unit_circle']

            this_model.loc[mod_num, 'datetime_run'] = pd.to_datetime(
                'today').strftime('%Y-%m-%d %H:%M:%S')

            df_results = df_results.append(this_model)
            mod_num += 1

        except:
            continue
    return df_results
Example #10
0
def process(path):
    df_w = pd.read_csv(path, index_col='Date', parse_dates=True)
    df_w = df_w[df_w.index > '2017-01-01']
    df_w.head()

    df_w = df_w[['Close']]
    plt.plot(df_w['Close'], label='Close')
    plt.title('Stock ' + str(df_w.index[0]).split(' ')[0] + ' to ' +
              str(df_w.index[-1]).split(' ')[0],
              fontsize=20)
    plt.xlabel('Days', fontsize=15)
    plt.ylabel('Closing Stock', fontsize=15)
    plt.legend(loc='upper left')
    fig = plt.gcf()
    fig.set_size_inches(16.5, 4.5)
    fig.savefig("static/results/livechart.png")

    decomposition = seasonal_decompose(df_w,
                                       model='additive',
                                       two_sided=False,
                                       freq=52)
    trend = decomposition.trend
    seasonal = decomposition.seasonal
    resid = decomposition.resid
    plt.plot(trend, label='Trend')
    plt.xlabel('Days', fontsize=15)
    plt.title('Stock Trend', fontsize=20)
    plt.ylabel('Stock Values', fontsize=15)
    plt.xticks(rotation=90)
    plt.legend(loc='upper left')
    fig = plt.gcf()
    fig.set_size_inches(16.5, 4.5)
    fig.savefig("static/results/Trend.png")

    plt.plot(seasonal, label='Seasonality')
    plt.xlabel('Days', fontsize=15)
    plt.ylabel('Days', fontsize=15)
    plt.title('Stock Seasonality', fontsize=20)
    plt.xticks(rotation=90)
    plt.legend(loc='upper left')
    fig = plt.gcf()
    fig.set_size_inches(15, 4.5)
    fig.savefig("static/results/Seasonality.png")

    ###### Adjusting Outliers #######
    wnd = 20

    df_w['RollingStd'] = df_w['Close'].rolling(window=wnd).std()
    df_w['Rollingmean'] = df_w['Close'].rolling(window=wnd).mean()

    st = df_w['RollingStd'][wnd]
    mn = df_w['Rollingmean'][wnd]

    for i in range(wnd + 1, len(df_w)):
        if df_w['RollingStd'][i] - st > st:
            df_w['RollingStd'][i] = st * 1.96
            df_w['Close'][i] = mn + st * 1.96
            if mn > df_w['Rollingmean'][i]:
                df_w['Rollingmean'][i] = mn - st
            else:
                df_w['Rollingmean'][i] = mn + st
            st = df_w['RollingStd'][i]
            mn = df_w['Rollingmean'][i]
        else:
            st = df_w['RollingStd'][i]
            mn = df_w['Rollingmean'][i]

    plt.plot(df_w['Rollingmean'], label='Rolling Mean')
    plt.plot(df_w['Close'][wnd:], label='Close')
    plt.xlabel('Days', fontsize=15)
    plt.ylabel('Stock values', fontsize=15)
    plt.title('Rolling Stats', fontsize=20)
    plt.xticks(rotation=90)
    plt.legend(loc='upper left')
    fig = plt.gcf()
    fig.set_size_inches(15, 4.5)
    fig.savefig("static/results/Rolling Stats.png")

    plt.plot(df_w['RollingStd'], label='Rolling STD')
    plt.legend(loc='upper left')
    fig = plt.gcf()
    fig.set_size_inches(15, 4.5)
    fig.savefig("static/results/Rolling STD.png")

    ############## Revenue Time series ACF and PACF Charts ####################

    df_w = df_w[['Close']]
    lag_acf = acf(df_w, nlags=20)
    lag_pacf = pacf(df_w, nlags=20, method='ols')

    #################### Looking at charts above we can create a differenced AR model of order 1  ###################
    ############### Run SARIMA Model ###################
    train = df_w['Close'][0:-10]
    test = df_w['Close'][len(train):]

    p = 1
    d = 0
    q = 0
    pp = 0
    dd = 1
    qq = 0
    z = 52
    aic = 'null'

    amape = 99
    af = []

    try:
        model = smt.SARIMAX(train.asfreq(freq='1d'),
                            exog=None,
                            order=(p, d, q),
                            seasonal_order=(pp, dd, qq, z),
                            trend='n').fit()
        aic = model.aic
        aic = round(aic, 2)
        pred = model.get_forecast(len(test))
        fcst = pred.predicted_mean
        fcst.index = test.index
        mapelist = []
        for i in range(len(fcst)):
            mapelist.insert(i, (np.absolute(test[i] - fcst[i])) / test[i])
        mape = np.mean(mapelist) * 100
        mape = round(mape, 2)
    except:
        mape = 9999
        pass

    amape = mape
    sap = p
    sad = d
    saq = q
    app = pp
    add = dd
    aqq = qq
    az = z
    af = fcst
    mse = mean_squared_error(test, af)
    rmse = np.sqrt(mse)
    rmse = round(rmse, 1)

    plt.plot(train)
    plt.plot(test, label='Actual')
    plt.plot(af, label='Predicted')
    fig = plt.gcf()
    fig.set_size_inches(15, 5.5)
    plt.title("Existing Prediction", fontsize=20)
    plt.legend(loc='upper left')
    plt.xlabel('Weeks', fontsize=15)
    fig.savefig("static/results/Previous.png")

    model = smt.SARIMAX(df_w.asfreq(freq='1d'),
                        exog=None,
                        order=(sap, sad, saq),
                        seasonal_order=(app, add, aqq, az)).fit()
    pred = model.get_forecast(10)
    cf = pred.conf_int(alpha=0.05)
    ax = df_w.plot(label='observed', figsize=(16.5, 5.5))
    pred.predicted_mean.plot(ax=ax, label='Forecast')
    ax.fill_between(cf.index,
                    cf.iloc[:, 0],
                    cf.iloc[:, 1],
                    color='k',
                    alpha=.25)
    ax.set_xlabel('Days', fontsize=15)
    ax.set_ylabel('Stock Price', fontsize=15)
    plt.legend(loc='upper left')
    plt.title("Forecasts from " + str(cf.index[0]).split(' ')[0] + " to " +
              str(cf.index[-1]).split(' ')[0],
              fontsize=20)
    fig = plt.gcf()
    fig.set_size_inches(15, 5.5)
    fig.savefig("static/results/Forecast.png")

    print(pred.predicted_mean)
    print(type(pred.predicted_mean))
    fcst = pred.conf_int(alpha=0.05)
    fcst['Forecast'] = pred.predicted_mean
    fcst = fcst.round(1)
    forecast = pd.DataFrame()
    forecast['Lower Price'] = fcst.apply(
        lambda x: "{:,}".format(x['lower Close']), axis=1)
    forecast['Upper Price'] = fcst.apply(
        lambda x: "{:,}".format(x['upper Close']), axis=1)
    forecast['Forecast'] = fcst.apply(lambda x: "{:,}".format(x['Forecast']),
                                      axis=1)
    return pred.predicted_mean
Example #11
0
# $$y_t = c + e_t + \theta_1 e_{t-1} + \theta_2 e_{t-2} + \ldots + \theta_q e_{t-q}$$
#
# Here the coefficients are residuals from previous predictions. 

# ##### Combine

# $$\Delta y_t = c + \phi_1 \Delta y_{t-1} + \theta_t e_{t-1} + e_t$$
#
# Using lag notation, where $L y_t = y_{t-1}$, i.e. y.shift() in pandas, we can rewrite that as
#
# $$(1 - \phi_1 L) (1 - L)y_t = c + (1 + \theta L)e_t$$
#
# for our specific `ARIMA(1, 1, 1)` model

mod = smt.SARIMAX(y, trend='c', order=(1, 1, 1))
res = mod.fit()
tsplot(res.resid[2:], lags=24);

res.summary()

# Looks better,
# but still needs seasonality adjustment.
#
# Seasonal ARIMA model is written as
# $\mathrm{ARIMA}(p,d,q)×(P,D,Q)_s$.
# Lowercase letters are non-seasonal components.
# Upper-case letters are similar specification for seasonal component—
# where $s$ is the periodicity
# (4 quarterly, 12 monthly).
#
Example #12
0
test_results.info()
test_results.dropna()
sns.heatmap(test_results.RMSE.unstack().mul(10),
            fmt='.2',
            annot=True,
            cmap='Blues_r')
plt.show()
plt.savefig(f'{p}3.png')

sns.heatmap(test_results.BIC.unstack(), fmt='.2f', annot=True, cmap='Blues_r')
plt.show()

model = tsa.ARMA(endog=industrial_production_log_diff, order=(0, 4)).fit()
print(model.summary())
plot_correlogram(model.resid)
plt.show()
plt.savefig(f'{p}4.png')

print(df[['RMSE', 'AIC', 'BIC']].sort_values('RMSE').head())
df[['RMSE', 'AIC', 'BIC']].corr('spearman')
sns.jointplot(y='RMSE', x='BIC', data=df[['RMSE', 'BIC']].rank())
df[(df.RMSE < df.RMSE.quantile(.05)) & (df.BIC < df.BIC.quantile(.1))]

best_model = tsa.SARIMAX(endog=industrial_production_log_diff,
                         order=(2, 0, 3),
                         seasonal_order=(1, 0, 0, 12)).fit()
print(best_model.summary())
plot_correlogram(best_model.resid, lags=20, title='Residuals')
plt.show()
plt.savefig(f'{p}5.png')
Example #13
0
phi, Phi = 0, 0
theta, Theta = 0.5, 0.8
ar_params = np.array([])
ma_params = np.array(
    [theta, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Theta, theta * Theta])
ar, ma = np.r_[1, -ar_params], np.r_[1, ma_params]
y = sm.tsa.ArmaProcess(ar, ma).generate_sample(500, burnin=50)

time_series = pd.Series(y)
train = time_series[:400]
test = time_series[400:]

p, d, q = 0, 0, 1
P, D, Q = 0, 0, 1
model = smt.SARIMAX(train, order=(p, d, q),
                    seasonal_order=(P, D, Q, 12)).fit(trend='c')
prediction_train = model.predict()
prediction_test = model.get_forecast(len(test)).predicted_mean
prediction_test_bound = model.get_forecast(len(test)).conf_int()

_, axes = plt.subplots(1, 1, figsize=(12, 5))
axes.plot(test.index, prediction_test, c='r', label='predict')
axes.fill_between(pd.DataFrame(prediction_test_bound, index=test.index).index,
                  pd.DataFrame(prediction_test_bound,
                               index=test.index).iloc[:, 0],
                  pd.DataFrame(prediction_test_bound,
                               index=test.index).iloc[:, 1],
                  color='k',
                  alpha=0.15)

train.plot(label='train', ax=axes)
Example #14
0
plot_model(y1[y_test.index[0]:], ar_yhat)
# %%
#TODO need to define the p,q vals
# arma_model = sm.ARMA(y1,?,exog=exog).fit()
# print('ARMA Summary')
# arma_model.summary()
# plot_model(y1,arma_model.fittedvalues)
# %%
#TODO is our data stationary?
#TODO need to define the p,q vals
# arima_model = sm.ARIMA(y1,exog=exog).fit()
# print('ARMIA Summary')
# arima_model.summary()
# plot_model(y1,arima_model.fittedvalues)
#%%
sarimax_model = sm.SARIMAX(y1, exog=exog).fit()
print('SARIMAX Summary:')
sarimax_model.summary()
plot_model(y1, sarimax_model.fittedvalues)

# %%
#Basic Attempt at Markov Chain
markov_model = sm.MarkovRegression(y1,
                                   k_regimes=3,
                                   trend='nc',
                                   switching_variance=True).fit()
markov_model.summary()
#%%
#plot markov
fig, axes = plt.subplots(2, figsize=(20, 7))
axes[0].plot(markov_model.filtered_marginal_probabilities[0])
Example #15
0
# remove trend, hetero
# df["log"] = np.log(df["passengers"])
# df["log_diff"] = df["log"].diff()
# df["log_diff"].plot()
# df.dropna(inplace=True)

# split the data into train and test
num = int(df.shape[0] * 2 / 3)
train_df = df.iloc[:num, :]
test_df = df.drop(index=train_df.index)
#

train_df_log = np.log(train_df)
test_df_log = np.log(test_df)

m1 = tsa.SARIMAX(train_df_log, order=(1, 1, 0)).fit()
m1.summary()

m1_df = train_df_log.copy()
m1_df["dx"] = m1_df["passengers"].diff()
m1_df["dxhat"] = m1.params[0] * m1_df["dx"].shift(1)
m1_df["xhat"] = m1_df["dxhat"] + m1_df["passengers"].shift(1)
m1_df["m_fit"] = m1.fittedvalues
m1_df["err"] = m1_df["xhat"] - m1_df["m_fit"]
m1_df["err"].iloc[3:].plot()

m1 = tsa.SARIMAX(train_df_log, order=(1, 1, 1)).fit()

m1_df = train_df_log.copy()
m1_df["dx"] = m1_df["passengers"].diff()
m1_df["res"] = m1.resid
Example #16
0
    # time_series = web.DataReader('IPGMFN', 'fred', '1988', '2017-12').squeeze().dropna()
    # time_series_log = np.log(time_series)
    # time_series_log_diff = time_series_log.diff(12).dropna()

    return (time_series, time_series_log, time_series_log_diff)


time_series, time_series_log, time_series_log_diff = get_data()
''' SARIMAX '''

model1 = tsa.statespace.SARIMAX(time_series_log,
                                order=(2, 0, 2),
                                seasonal_order=(0, 1, 0, 12)).fit()
print(model1.summary())
plot_model_summary(model1.summary(), title='ARMA_model_summary_1')

model2 = tsa.statespace.SARIMAX(time_series_log_diff,
                                order=(2, 0, 2),
                                seasonal_order=(0, 0, 0, 12)).fit()
print(model2.summary())
plot_model_summary(model2.summary(), title='SARIMAX_model_summary_1')

print(model1.params.to_frame('SARIMAX').join(model2.params.to_frame('diff')))

best_model = tsa.SARIMAX(endog=time_series_log_diff,
                         order=(2, 0, 3),
                         seasonal_order=(1, 0, 0, 12)).fit()
print(best_model.summary())
plot_model_summary(best_model.summary(), title='best_SARIMAX_model_summary')
plot_correlogram(best_model.resid, lags=20, title='Residuals_SARIMAX')
Example #17
0
#---- ch04/import-tsa
import statsmodels.tsa.api as tsa


#---- ch04/acf/plot
tsa.graphics.plot_acf(y)
plt.show()


#---- ch04/pacf/plot
tsa.graphics.plot_pacf(y)
plt.show()


#---- ch04/ar-estimate
mod = tsa.SARIMAX(y, order=(2, 0, 0))
result = mod.fit() 


#---- ch04/ar-params
result.params


#---- ch04/ar-summary/dnr
result.summary()




Example #18
0
def example_3():
	import pandas_datareader as pdr

	gs = pdr.data.DataReader("GS", data_source='yahoo', start='2006-01-01', end='2010-01-01')
	print(gs.head().round(2))
	print(gs.loc[pd.Timestamp('2006-01-01'):pd.Timestamp('2006-12-31')].head())
	print(gs.loc['2006'].head())

	#--------------------
	# Resampling.
	if True:
		print(gs.resample("5d").mean().head())
		print(gs.resample("W").agg(['mean', 'sum']).head())

		# You can up-sample to convert to a higher frequency. The new points are filled with NaNs.
		print(gs.resample("6h").mean().head())

	#--------------------
	# Rolling, expanding, exponential weighted (EW).
	if False:
		gs.Close.plot(label='Raw')
		gs.Close.rolling(28).mean().plot(label='28D MA')
		gs.Close.expanding().mean().plot(label='Expanding Average')
		gs.Close.ewm(alpha=0.03).mean().plot(label='EWMA($\\alpha=.03$)')

		plt.legend(bbox_to_anchor=(1.25, .5))
		plt.tight_layout()
		plt.ylabel("Close ($)")
		sns.despine()

		# Each of .rolling, .expanding, and .ewm return a deferred object, similar to a GroupBy.
		roll = gs.Close.rolling(30, center=True)

		m = roll.agg(['mean', 'std'])
		plt.figure()
		ax = m['mean'].plot()
		ax.fill_between(m.index, m['mean'] - m['std'], m['mean'] + m['std'], alpha=.25)
		plt.tight_layout()
		plt.ylabel("Close ($)")
		sns.despine()

	#--------------------
	# Grab bag.
	if False:
		# Offsets.
		#	These are similar to dateutil.relativedelta, but works with arrays.
		print(gs.index + pd.DateOffset(months=3, days=-2))

		# Holiday calendars.
		from pandas.tseries.holiday import USColumbusDay
		print(USColumbusDay.dates('2015-01-01', '2020-01-01'))

		# Timezones.
		# tz naiive -> tz aware..... to desired UTC
		print(gs.tz_localize('US/Eastern').tz_convert('UTC').head())

	#--------------------
	# Modeling time series.
	if True:
		from collections import namedtuple
		import statsmodels.formula.api as smf
		import statsmodels.tsa.api as smt
		import statsmodels.api as sm
		from modern_pandas_utils import download_timeseries

		def download_many(start, end):
			months = pd.period_range(start, end=end, freq='M')
			# We could easily parallelize this loop.
			for i, month in enumerate(months):
				download_timeseries(month)

		def time_to_datetime(df, columns):
			'''
			Combine all time items into datetimes.
			2014-01-01,1149.0 -> 2014-01-01T11:49:00
			'''
			def converter(col):
				timepart = (col.astype(str)
					.str.replace('\.0$', '')  # NaNs force float dtype
					.str.pad(4, fillchar='0'))
				return  pd.to_datetime(df['fl_date'] + ' ' + timepart.str.slice(0, 2) + ':' + timepart.str.slice(2, 4), errors='coerce')
				return datetime_part
			df[columns] = df[columns].apply(converter)
			return df

		def unzip_one(fp):
			try:
				zf = zipfile.ZipFile(fp)
				csv = zf.extract(zf.filelist[0])
				return csv
			except zipfile.BadZipFile as ex:
				print('zipfile.BadZipFile raised in {}: {}.'.format(fp, ex))
				raise

		def read_one(fp):
			df = (pd.read_csv(fp, encoding='latin1')
				.rename(columns=str.lower)
				.drop('unnamed: 6', axis=1)
				.pipe(time_to_datetime, ['dep_time', 'arr_time', 'crs_arr_time', 'crs_dep_time'])
				.assign(fl_date=lambda x: pd.to_datetime(x['fl_date'])))
			return df

		store = './modern_pandas_data/ts.hdf5'

		if not os.path.exists(store):
			download_many('2000-01-01', '2016-01-01')

			zips = glob.glob(os.path.join('modern_pandas_data', 'timeseries', '*.zip'))
			csvs = [unzip_one(fp) for fp in zips]
			dfs = [read_one(fp) for fp in csvs]
			df = pd.concat(dfs, ignore_index=True)

			df['origin'] = df['origin'].astype('category')
			df.to_hdf(store, 'ts', format='table')
		else:
			df = pd.read_hdf(store, 'ts')

		with pd.option_context('display.max_rows', 100):
			print(df.dtypes)

		daily = df.fl_date.value_counts().sort_index()
		y = daily.resample('MS').mean()
		print(y.head())

		ax = y.plot()
		ax.set(ylabel='Average Monthly Flights')
		sns.despine()

		X = (pd.concat([y.shift(i) for i in range(6)], axis=1, keys=['y'] + ['L%s' % i for i in range(1, 6)]).dropna())
		print(X.head())

		mod_lagged = smf.ols('y ~ trend + L1 + L2 + L3 + L4 + L5', data=X.assign(trend=np.arange(len(X))))
		res_lagged = mod_lagged.fit()
		res_lagged.summary()

		sns.heatmap(X.corr())

		ax = res_lagged.params.drop(['Intercept', 'trend']).plot.bar(rot=0)
		plt.ylabel('Coefficeint')
		sns.despine()

		# Autocorrelation.
		# 'Results.resid' is a series of residuals: y - ŷ.
		mod_trend = sm.OLS.from_formula('y ~ trend', data=y.to_frame(name='y').assign(trend=np.arange(len(y))))
		res_trend = mod_trend.fit()

		def tsplot(y, lags=None, figsize=(10, 8)):
			fig = plt.figure(figsize=figsize)
			layout = (2, 2)
			ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
			acf_ax = plt.subplot2grid(layout, (1, 0))
			pacf_ax = plt.subplot2grid(layout, (1, 1))
			
			y.plot(ax=ts_ax)
			smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
			smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
			[ax.set_xlim(1.5) for ax in [acf_ax, pacf_ax]]
			sns.despine()
			plt.tight_layout()
			return ts_ax, acf_ax, pacf_ax

		tsplot(res_trend.resid, lags=36)

		y.to_frame(name='y').assign(Δy=lambda x: x.y.diff()).plot(subplots=True)
		sns.despine()

		ADF = namedtuple("ADF", "adf pvalue usedlag nobs critical icbest")

		#ADF(*smt.adfuller(y))._asdict()
		ADF(*smt.adfuller(y.dropna()))._asdict()
		ADF(*smt.adfuller(y.diff().dropna()))._asdict()

		data = (y.to_frame(name='y').assign(Δy=lambda df: df.y.diff()).assign(LΔy=lambda df: df.Δy.shift()))
		mod_stationary = smf.ols('Δy ~ LΔy', data=data.dropna())
		res_stationary = mod_stationary.fit()

		tsplot(res_stationary.resid, lags=24)

		# Seasonality.
		#smt.seasonal_decompose(y).plot()
		smt.seasonal_decompose(y.fillna(method='ffill')).plot()

		# ARIMA.
		mod = smt.SARIMAX(y, trend='c', order=(1, 1, 1))
		res = mod.fit()
		tsplot(res.resid[2:], lags=24)

		res.summary()

		mod_seasonal = smt.SARIMAX(y, trend='c', order=(1, 1, 2), seasonal_order=(0, 1, 2, 12), simple_differencing=False)
		res_seasonal = mod_seasonal.fit()

		res_seasonal.summary()

		tsplot(res_seasonal.resid[12:], lags=24)

		# Forecasting.
		pred = res_seasonal.get_prediction(start='2001-03-01')
		pred_ci = pred.conf_int()

		plt.figure()
		ax = y.plot(label='observed')
		pred.predicted_mean.plot(ax=ax, label='Forecast', alpha=.7)
		ax.fill_between(pred_ci.index, pred_ci.iloc[:, 0], pred_ci.iloc[:, 1], color='k', alpha=.2)
		ax.set_ylabel("Monthly Flights")
		plt.legend()
		sns.despine()

		pred_dy = res_seasonal.get_prediction(start='2002-03-01', dynamic='2013-01-01')
		pred_dy_ci = pred_dy.conf_int()

		plt.figure()
		ax = y.plot(label='observed')
		pred_dy.predicted_mean.plot(ax=ax, label='Forecast')
		ax.fill_between(pred_dy_ci.index, pred_dy_ci.iloc[:, 0], pred_dy_ci.iloc[:, 1], color='k', alpha=.25)
		ax.set_ylabel("Monthly Flights")

		# Highlight the forecast area.
		ax.fill_betweenx(ax.get_ylim(), pd.Timestamp('2013-01-01'), y.index[-1], alpha=.1, zorder=-1)
		ax.annotate('Dynamic $\\longrightarrow$', (pd.Timestamp('2013-02-01'), 550))

		plt.legend()
		sns.despine()

	plt.show()