Esempio n. 1
0
def get_arma_order(endog, exog):
    if not exog is None:
        auto_arma_order = arma_order_select_ic(endog, ic=['aic'], model_kw={'exog':exog}, fit_kw={'exog':exog,'maxiter':100})
        return auto_arma_order['aic_min_order'][0], auto_arma_order['aic_min_order'][1]
    else:
        auto_arma_order = arma_order_select_ic(endog, ic=['aic'], fit_kw={'maxiter':100})
        return auto_arma_order['aic_min_order'][0], auto_arma_order['aic_min_order'][1]
def test_arma_order_select_ic():
    # smoke test, assumes info-criteria are right
    from statsmodels.tsa.arima_process import arma_generate_sample

    arparams = np.array([.75, -.25])
    maparams = np.array([.65, .35])
    arparams = np.r_[1, -arparams]
    maparam = np.r_[1, maparams]
    nobs = 250
    np.random.seed(2014)
    y = arma_generate_sample(arparams, maparams, nobs)
    res = arma_order_select_ic(y, ic=['aic', 'bic'], trend='nc')
    # regression tests in case we change algorithm to minic in sas
    aic_x = np.array([[       np.nan,  552.7342255 ,  484.29687843],
                      [ 562.10924262,  485.5197969 ,  480.32858497],
                      [ 507.04581344,  482.91065829,  481.91926034],
                      [ 484.03995962,  482.14868032,  483.86378955],
                      [ 481.8849479 ,  483.8377379 ,  485.83756612]])
    bic_x = np.array([[       np.nan,  559.77714733,  494.86126118],
                      [ 569.15216446,  496.08417966,  494.41442864],
                      [ 517.61019619,  496.99650196,  499.52656493],
                      [ 498.12580329,  499.75598491,  504.99255506],
                      [ 499.49225249,  504.96650341,  510.48779255]])
    aic = DataFrame(aic_x, index=lrange(5), columns=lrange(3))
    bic = DataFrame(bic_x, index=lrange(5), columns=lrange(3))
    assert_almost_equal(res.aic.values, aic.values, 5)
    assert_almost_equal(res.bic.values, bic.values, 5)
    assert_equal(res.aic_min_order, (1, 2))
    assert_equal(res.bic_min_order, (1, 2))
    assert_(res.aic.index.equals(aic.index))
    assert_(res.aic.columns.equals(aic.columns))
    assert_(res.bic.index.equals(bic.index))
    assert_(res.bic.columns.equals(bic.columns))

    index = pd.date_range('2000-1-1', freq='M', periods=len(y))
    y_series = pd.Series(y, index=index)
    res_pd = arma_order_select_ic(y_series, max_ar=2, max_ma=1,
                                  ic=['aic', 'bic'], trend='nc')
    assert_almost_equal(res_pd.aic.values, aic.values[:3, :2], 5)
    assert_almost_equal(res_pd.bic.values, bic.values[:3, :2], 5)
    assert_equal(res_pd.aic_min_order, (2, 1))
    assert_equal(res_pd.bic_min_order, (1, 1))

    res = arma_order_select_ic(y, ic='aic', trend='nc')
    assert_almost_equal(res.aic.values, aic.values, 5)
    assert_(res.aic.index.equals(aic.index))
    assert_(res.aic.columns.equals(aic.columns))
    assert_equal(res.aic_min_order, (1, 2))
Esempio n. 3
0
def test_arma_order_select_ic_failure():
    # this should trigger an SVD convergence failure, smoke test that it
    # returns, likely platform dependent failure...
    # looks like AR roots may be cancelling out for 4, 1?
    y = np.array(
        [
            0.86074377817203640006,
            0.85316549067906921611,
            0.87104653774363305363,
            0.60692382068987393851,
            0.69225941967301307667,
            0.73336177248909339976,
            0.03661329261479619179,
            0.15693067239962379955,
            0.12777403512447857437,
            -0.27531446294481976,
            -0.24198139631653581283,
            -0.23903317951236391359,
            -0.26000241325906497947,
            -0.21282920015519238288,
            -0.15943768324388354896,
            0.25169301564268781179,
            0.1762305709151877342,
            0.12678133368791388857,
            0.89755829086753169399,
            0.82667068795350151511,
        ]
    )
    import warnings

    with warnings.catch_warnings():
        # catch a hessian inversion and convergence failure warning
        warnings.simplefilter("ignore")
        res = arma_order_select_ic(y)
Esempio n. 4
0
def test_arma_order_select_ic_failure():
    # this should trigger an SVD convergence failure, smoke test that it
    # returns, likely platform dependent failure...
    y = np.array([ 0.86074377817203640006,  0.85316549067906921611,
        0.87104653774363305363,  0.60692382068987393851,
        0.69225941967301307667,  0.73336177248909339976,
        0.03661329261479619179,  0.15693067239962379955,
        0.12777403512447857437, -0.27531446294481976   ,
       -0.24198139631653581283, -0.23903317951236391359,
       -0.26000241325906497947, -0.21282920015519238288,
       -0.15943768324388354896,  0.25169301564268781179,
        0.1762305709151877342 ,  0.12678133368791388857,
        0.89755829086753169399,  0.82667068795350151511])
    res = arma_order_select_ic(y)
Esempio n. 5
0
def test_arma_order_select_ic_failure():
    # this should trigger an SVD convergence failure, smoke test that it
    # returns, likely platform dependent failure...
    # looks like AR roots may be cancelling out for 4, 1?
    y = np.array([
        0.86074377817203640006, 0.85316549067906921611, 0.87104653774363305363,
        0.60692382068987393851, 0.69225941967301307667, 0.73336177248909339976,
        0.03661329261479619179, 0.15693067239962379955, 0.12777403512447857437,
        -0.27531446294481976, -0.24198139631653581283, -0.23903317951236391359,
        -0.26000241325906497947, -0.21282920015519238288,
        -0.15943768324388354896, 0.25169301564268781179, 0.1762305709151877342,
        0.12678133368791388857, 0.89755829086753169399, 0.82667068795350151511
    ])
    import warnings
    with warnings.catch_warnings():
        # catch a hessian inversion and convergence failure warning
        warnings.simplefilter("ignore")
        res = arma_order_select_ic(y)
def main():

    df = pd.read_csv(FILE_NAME, sep=',', skipinitialspace=True, encoding='utf-8')
    df = df.drop('AverageTemperatureUncertainty', axis=1)
    df = df[df.Country == 'Canada']
    df = df.drop('Country', axis=1)
    df.index = pd.to_datetime(df.dt)
    df = df.drop('dt', axis=1)
    df = df.ix['1900-01-01':]
    df = df.sort_index()

    # Display AT
    df.AverageTemperature.fillna(method='pad', inplace=True)
    mp.plot(df.AverageTemperature)
    mp.show()

    # Rolling Mean
    df.AverageTemperature.plot.line(style='b', legend=True, grid=True, label='Avg. Temperature (AT)')
    ax = df.AverageTemperature.rolling(window=12).mean().plot.line(style='r', legend=True, label='Mean AT')
    ax.set_xlabel('Date')
    mp.legend(loc='best')
    mp.title('Weather timeseries visualization')
    mp.show()

    test_stationarity(df.AverageTemperature)

    res = arma_order_select_ic(df.AverageTemperature, ic=['aic', 'bic'], trend='nc',
              max_ar=10, max_ma=10, fit_kw={'method': 'css-mle'})
    print (res)

    # Fit the model
    ts = pd.Series(df.AverageTemperature, index=df.index)
    model = ARMA(ts, order=(5, 6))
    results = model.fit(trend='nc', method='css-mle')
    print(results.summary2())

    # Plot the model
    fig, ax = mp.subplots(figsize=(10, 8))
    fig = results.plot_predict('01/01/2003', '12/01/2023', ax=ax)
    ax.legend(loc='lower left')
    mp.title('Weather Time Series prediction')
    mp.show()

    predictions = results.predict('01/01/2003', '12/01/2023')
Esempio n. 7
0
def main():

    df = pd.read_csv(FILE_NAME, sep=',', skipinitialspace=True, encoding='utf-8')
    df = df.drop('AverageTemperatureUncertainty', axis=1)
    df = df[df.Country == 'Canada']
    df = df.drop('Country', axis=1)
    df.index = pd.to_datetime(df.dt)
    df = df.drop('dt', axis=1)
    df = df.ix['1900-01-01':]
    df = df.sort_index()

    # Display AT
    df.AverageTemperature.fillna(method='pad', inplace=True)
    mp.plot(df.AverageTemperature)
    mp.show()

    # Rolling Mean
    df.AverageTemperature.plot.line(style='b', legend=True, grid=True, label='Avg. Temperature (AT)')
    ax = df.AverageTemperature.rolling(window=12).mean().plot.line(style='r', legend=True, label='Mean AT')
    ax.set_xlabel('Date')
    mp.legend(loc='best')
    mp.title('Weather timeseries visualization')
    mp.show()

    test_stationarity(df.AverageTemperature)

    res = arma_order_select_ic(df.AverageTemperature, ic=['aic', 'bic'], trend='nc',
              max_ar=4, max_ma=4, fit_kw={'method': 'css-mle'})
    print res

    # Fit the model
    ts = pd.Series(df.AverageTemperature, index=df.index)
    model = ARMA(ts, order=(3, 3))
    results = model.fit(trend='nc', method='css-mle')
    print(results.summary2())

    # Plot the model
    fig, ax = mp.subplots(figsize=(10, 8))
    fig = results.plot_predict('01/01/2003', '12/01/2023', ax=ax)
    ax.legend(loc='lower left')
    mp.title('Weather Time Series prediction')
    mp.show()

    predictions = results.predict('01/01/2003', '12/01/2023')
Esempio n. 8
0
def arma_model_selection(series, max_ar=4, max_ma=4):
    assert not series.isnull().any()
    order_select = sto.arma_order_select_ic(
        series.values,
        ic=['aic', 'bic'],
        max_ar=max_ar,
        max_ma=max_ma
    )

    plt.figure(figsize=(10, 4))

    plt.subplot(1, 2, 1)
    sns.heatmap(order_select["aic"])
    plt.xlabel("Ordre MA")
    plt.ylabel("Ordre AR")
    plt.title("Résultats AIC")

    plt.subplot(1, 2, 2)
    sns.heatmap(order_select["bic"])
    plt.xlabel("Ordre MA")
    plt.ylabel("Ordre AR")
    plt.title("Résultats BIC")

    plt.suptitle(f"max_ar={max_ar}, max_ma={max_ma}")
    plt.show();

    aic_min_order = order_select["aic_min_order"]
    bic_min_order = order_select["bic_min_order"]
    print(
        "AIC meilleur modèle : AR={}, MA={}, AIC={} ".format(
            aic_min_order[0], aic_min_order[1],
            order_select['aic'].loc[aic_min_order]
        )
    )
    print(
        "BIC meilleur modèle : AR={}, MA={}, BIC={} ".format(
            bic_min_order[0], bic_min_order[1],
            order_select['bic'].loc[bic_min_order]
        )
    )

    return order_select
Esempio n. 9
0
def derive_models(y, sentiment, symbol):
    models = y
    data = y['close']

    max_iter = min(5, len(data) - 1)
    best_ar = tsa.ar_model.ar_select_order(data, maxlag=max_iter, ic="aic")
    lags = 0 if best_ar.ar_lags is [] else 1
    max_lags = "AR(" + str(lags) + ")"
    ar_model = tsa.ar_model.AutoReg(data, lags=lags)
    models[max_lags] = ar_model.predict(ar_model.fit().params)

    best_ma_order = tsa.stattools.arma_order_select_ic(data,
                                                       max_ar=0,
                                                       max_ma=max_iter,
                                                       ic="aic")
    min_order = "ARMA(0," + str(max(best_ma_order.aic_min_order)) + ")"
    best_ma = tsa.arima.model.ARIMA(data,
                                    order=(0, 0,
                                           best_ma_order.aic_min_order[1]))
    models[min_order] = best_ma.fit().predict()

    best_arma_model = st.arma_order_select_ic(data,
                                              max_ar=max_iter,
                                              max_ma=max_iter,
                                              ic="aic")
    arma_order = "ARMA(" + str(best_arma_model.aic_min_order) + ")"
    best_arma = tsa.arima.model.ARIMA(data,
                                      order=(best_arma_model.aic_min_order[0],
                                             0,
                                             best_arma_model.aic_min_order[1]))
    models[arma_order] = best_arma.fit().predict()

    best_arima = tsa.arima.model.ARIMA(
        endog=data,
        exog=sentiment,
        order=(best_arma_model.aic_min_order[0], 0,
               best_arma_model.aic_min_order[1]))
    arima_order = "ARIMA(" + str(best_arma_model.aic_min_order) + "," + str(
        best_arima.fit().params[1]) + "*sentiment)"
    models[arima_order] = best_arima.fit().predict()
    return models
Esempio n. 10
0
    def run_ARMAX(self, exogenous_data):
        self.armax_order = arma_order_select_ic(self.train, 5, 5)
        self.exogenous_train = exogenous_data[:len(exogenous_data) -
                                              self.test_size]
        self.exogenous_test = exogenous_data[len(exogenous_data) -
                                             self.test_size:]

        model = ARMA(self.train,
                     order=self.armax_order['bic_min_order'],
                     exog=self.exogenous_train)
        model_fit = model.fit()
        print('Lag: %s' % model_fit.k_ar)
        print('Coefficients: %s' % model_fit.params)

        predictions = model_fit.predict(start=len(self.train),
                                        end=len(self.train) + len(self.test) -
                                        1,
                                        dynamic=False,
                                        exog=self.exogenous_test)

        return np.array(predictions)
Esempio n. 11
0
def test_find_ARIMA_params_automated():
    from statsmodels.tsa.arima_process import arma_generate_sample

    arparams = np.array([.75, -.25])
    maparams = np.array([0.65, 0.35])
    arparams = np.r_[1, -arparams]
    maparam = np.r_[1, maparams]

    # number of observations
    nobs = 250
    np.random.seed(2014)

    y = arma_generate_sample(arparams, maparams, nobs)
    res = arma_order_select_ic(y,
                               max_ar=4,
                               max_ma=4,
                               ic=['aic', 'bic'],
                               trend='nc',
                               model_kw={},
                               fit_kw={'method': 'css'})

    return res
Esempio n. 12
0
def test_model(run_id):
    """Function to test model for one run

    Args:
        run_id: run for which to test model

    Returns: plots showing model results
    """
    # Retrieve data for one run to model
    start = datetime.datetime(2014, 5, 18)
    end = datetime.datetime(2018, 5, 17)
    test_measures = REPO.get_measurements(run_id=run_id,
                                          start_date=start,
                                          end_date=end)

    # Average data and create train/test split
    measures_daily = daily_avg(test_measures)
    train_measures_daily = measures_daily[:-6]
    test_measures_daily = measures_daily[-7:]
    train_measures_daily = train_measures_daily.dropna()

    # Check if data is stationary
    test_stationarity(train_measures_daily['flow'])

    # Determine p and q parameters for ARIMA model
    params = arma_order_select_ic(train_measures_daily['flow'], ic='aic')

    # Build and fit model
    mod = ARIMA(train_measures_daily['flow'],
                order=(params.aic_min_order[0], 0, params.aic_min_order[1]),
                exog=train_measures_daily[['temp', 'precip']]).fit()
    test_measures_daily.loc[:, 'prediction'] = \
        mod.forecast(steps=7, exog=test_measures_daily[['temp', 'precip']])[0]
    train_measures_daily.loc[:, 'model'] = mod.predict()

    # Plot results
    plt.plot(test_measures_daily[['flow', 'prediction']])
    plt.plot(train_measures_daily[['flow', 'model']]['2015-07':])
    plt.legend(['Test values', 'Prediction', 'Train values', 'Model'])
Esempio n. 13
0
def arima():

    series_ch = pd.read_csv(
        "http://labfile.oss.aliyuncs.com/courses/1176/agriculture.csv",
        index_col=0)
    series_ch.plot(figsize=(9, 6))

    fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3))
    diff_ch = series_ch.diff().dropna()
    axes[0].plot(diff_ch)
    autocorrelation_plot(diff_ch, ax=axes[1])
    axes[2].plot(acorr_ljungbox(diff_ch)[1])

    fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3))
    diff_ch1 = series_ch.diff(periods=2).dropna()
    axes[0].plot(diff_ch1)
    autocorrelation_plot(diff_ch1, ax=axes[1])
    axes[2].plot(acorr_ljungbox(diff_ch1)[1])

    fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3))
    diff_ch2 = series_ch.diff().diff().dropna()
    axes[0].plot(diff_ch2)
    autocorrelation_plot(diff_ch2, ax=axes[1])
    axes[2].plot(acorr_ljungbox(diff_ch2)[1])

    fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3))
    diff_ch3 = series_ch.diff().diff().diff().dropna()
    axes[0].plot(diff_ch3)
    autocorrelation_plot(diff_ch3, ax=axes[1])
    axes[2].plot(acorr_ljungbox(diff_ch3)[1])

    d = 1

    p, q = arma_order_select_ic(diff_ch, ic='aic')['aic_min_order']
    print('p,d,q', p, d, q)
    return p, d, q
Esempio n. 14
0
def arma_select_order(gold):
    sttool.arma_order_select_ic(gold,
                                max_ar=3,
                                max_ma=3,
                                ic=['aic', 'bic'],
                                trend='nc')
Esempio n. 15
0
%run setup.ipy

import quandl
import my_secrets
quandl.ApiConfig.api_key = my_secrets.QUANDL_API_KEY

xrp = quandl.get("BITFINEX/XRPUSD")["Last"]
eth = quandl.get("BITFINEX/ETHUSD")["Last"]

# Should we replace the index? Some dates are missing so we can't set a frquency to 'D' without actually
# replacing the entire index? This generates a lot of warnings...


# When we've played with values a bit, we can then use the below.
from statsmodels.tsa import stattools
xrp_stats = stattools.arma_order_select_ic(xrp) 
eth_stats = stattools.arma_order_select_ic(eth) 
print("XRP: ", xrp_stats)
print("ETH:", eth_stats)

from statsmodels import api as sms
xrp_model = sms.tsa.ARMA(xrp, order=(4, 2))
eth_model = sms.tsa.ARMA(eth, order=(3, 2))

xrp_results = xrp_model.fit()
eth_results = eth_model.fit()

xrp_results.summary()
eth_results.summary()
Esempio n. 16
0
df["production"] = df["production"].diff()
df["production"].dropna(inplace=True)  # 删去 NaN的值
# 再进行检验k, 还是不行,再做一次差分处理
df["production"] = df["production"].diff()
df.dropna(inplace=True)
# result = test_stationary(df)
# print(result)
# plt.show()
# 根据检验可以认为是平稳的了

# ------------------------第三步模拟定阶和拟合
# 常用的有根据 ACF和 PACF结果的观察来定阶和暴力定阶
# 由于阶数一般不会很大,在数据量也不大的情况下,暴力定阶是个不错的选择。
# 暴力定阶通过遍历可能的阶数,找到 aic, bic, hqic最小的值,作为最优阶数
order = stattools.arma_order_select_ic(df["production"].values,
                                       max_ar=3,
                                       max_ma=3,
                                       ic=["aic", "bic", "hqic"])
bic_min = order.bic_min_order
print(order.bic_min_order)
# 拟合
model = ARMA(df, bic_min).fit()

# ----------------------第四步白噪声检验
# QQ图检验,DW检验
resid = model.resid
# fig = plt.figure(figsize=(6, 6))
# ax = fig.add_subplot(111)
# fig = qqplot(resid, line="q", ax=ax, fit=True)
# plt.show()

# DW检验,如果值接近 2,认为系列不存在一阶相关性
Esempio n. 17
0
#3.下面可以开始建模过程,
#(1)观察自相关图和偏自相关图,定阶

#求自相关,偏自相关系数
lag_acf = acf(data, nlags=20, fft=False)
lag_pacf = pacf(data, nlags=20, method='ols')

#用自相关、偏自相关
fig, axes = plt.subplots(1, 2, figsize=(20, 5))
plot_acf(data, lags=20, ax=axes[0])
plot_pacf(data, lags=20, ax=axes[1], method='ols')
plt.show(block=True)

#这里python已经写了一个可以帮助选阶的函数
order_trend = arma_order_select_ic(data)
print(order_trend['bic_min_order'])
#结果为(2, 0)  也就是使用AR(2)模型

result_trend = ARMA(data, (2, 0)).fit()
print(result_trend.params)
exit()
#result_trend.arparams   关于AR的参数
#result_trend.bic        BIC信息准则值
#result_trend.bse        参数的标准误
#result_trend.hqic       HQ信息准则
#result_trend.k_ar       AR系数的数量
#result_trend.k_ma       MA系数数量
#result_trend.k_trend    有常数时是1,没有常数时是0
#result_trend.llf        对数似然函数值
#result_trend.maparams   MA参数值
Esempio n. 18
0
stdresid = model6.resid / math.sqrt(model6.sigma)
plt.plot(stdresid)
plot_acf(stdresid, lags=20)
LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:13], len(stdresid))
print(LjungBox[1][-1])
LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:20], len(stdresid))
print(LjungBox[1][-1])
plot_acf(stdresid, lags=40)

print(model6.forecast(3)[0])

print(CPI.head(3))

Datang = pd.read_csv('Datang.csv', index_col='time')
Datang.index = pd.to_datetime(Datang.index)
returns = Datang['2014-01-01':'2016-01-01']
print(returns.head(n=3))
print(returns.tail(n=3))
print(ADF(returns).summary())
print(stattools.q_stat(stattools.acf(returns)[1:12], len(returns))[1])

print(stattools.arma_order_select_ic(returns, max_ma=4))
model = arima_model.ARIMA(returns, order=(1, 0, 1)).fit()
print(model.summary())
print(model.conf_int())
stdresid = model.resid / math.sqrt(model.sigma2)
plt.plot(stdresid)
plot_acf(stdresid, lags=12)
LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:12], len(stdresid))
print(LjungBox[1])
Esempio n. 19
0
def find_order(data):

    result = arma_order_select_ic(data["close"]).bic_min_order
    return (result[0], 1, result[1])
Esempio n. 20
0
plt.plot(lag_pacf, marker="o")
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(df["df_log_shift"].dropna())),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(df["df_log_shift"].dropna())),
            linestyle='--',
            color='gray')
plt.title('Partial Autocorrelation Function')
plt.xlabel('number of lags')
plt.ylabel('correlation')
plt.tight_layout()

from statsmodels.tsa.stattools import arma_order_select_ic

arma_order_select_ic(df["df_log_shift"].dropna())

### AR

from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(df["df_log_shift"].dropna(), order=(2, 2, 0))

results_AR = model.fit(disp=-1)
plt.plot(df["df_log_shift"].dropna())
plt.plot(results_AR.fittedvalues, color="Red")
plt.show()

print(df["df_log_shift"].dropna())
print(results_AR.fittedvalues)

#MA
Esempio n. 21
0
warnings.filterwarnings("ignore", category=Warning)

data = datapreprocess()

output_lst = []
# generate submit version
for ampm in ampms.keys():
    for route in routes:
        for weekday in weekdays:
            # log-smooth
            sub = np.log(tolist(data[route][weekday][ampm]))
            # sub = np.log(tolist(data['C-3'][6]['am']))
            # fit best model
            order = st.arma_order_select_ic(sub,
                                            max_ar=5,
                                            max_ma=5,
                                            ic=['aic', 'bic', 'hqic'])

            model = ARMA(sub, order=order.bic_min_order)
            result_arma = model.fit(disp=-1, method='css')
            predict = result_arma.predict()

            start = len(sub) - len(predict)
            end = start + len(predict) + 6
            # fig = result_arma.plot_predict(start, end)
            # fig.suptitle("%s %s %s" %(route,weekday,ampm))

            forecast = result_arma.predict(start, end)[-6:]
            # print (np.exp(sub))
            for x in np.exp(forecast):
                output_lst.append(x)
Esempio n. 22
0
plt.show()

#6.
import statsmodels.tsa.arima_process as sm
from statsmodels.graphics.tsaplots import *
import numpy as np
import pandas as pd
numbers=np.random.normal(size=100)
numbers=pd.Series(numbers)

numbers.plot()
plt.show()
plot_acf(numbers,lags=20)

from statsmodels.tsa import stattools
stattools.arma_order_select_ic(numbers.values,max_ma=4)

#7.
zgsy=pd.read_csv('Data/Part4/003/zgsy.csv')
clprice=zgsy.iloc[:,4]
clprice.plot()
plot_acf(clprice,lags=20)
from arch.unitroot import ADF
adf=ADF(clprice,lags=6)
print(adf.summary().as_text())

logReturn=pd.Series((np.log(clprice))).diff().dropna()
logReturn.plot()

adf=ADF(logReturn,lags=6)
print(adf.summary().as_text())
Esempio n. 23
0
show_acf_pacf(sleep_cost_all)

import matplotlib.pyplot as plt

ts_log = np.log(sleep_cost_all)


def show_figure(ts):
    plt.plot(range(len(ts)), ts)
    plt.show()


import statsmodels.tsa.stattools as st

order = st.arma_order_select_ic(sleep_cost_all,
                                max_ar=5,
                                max_ma=5,
                                ic=['aic', 'bic', 'hqic'])
order.bic_min_order
#结果是(0,0)

from statsmodels.tsa.arima_model import ARMA

model = ARMA(sleep_cost_all, order=(2, 1))  #滑动平稳
result_arma = model.fit(disp=-1, method='css')  #拟合
train_predict = result_arma.predict(7)  #预测

#均方根误差
np_arr = np.array(sleep_cost_all)
RMSE = np.sqrt(((train_predict - np_arr[7:])**2).sum() / np_arr[7:].size)
print(RMSE)
df_read = sm.datasets.nile.load_pandas().data
#print (df_read['volume'])



# Data Split (70: 30)


df_test = df_nile['volume'].iloc[4000:6001]
df_train = df_nile['volume'].iloc[:4000]


# from statsmodels.tsa import stattools as st
# ARMAモデルの次数を決める

print(st.arma_order_select_ic(df_train, ic='bic', trend = 'nc'))

arma_11 = sm.tsa.ARMA(df_train, (3, 0)).fit()

#arma_11 = sm.tsa.SARIMAX(df_train, order=(3,1,2), seasonal_order=(0,0,0,213), enforce_stationarity = False, enforce_invertibility = False).fit()



arma_11_inpred = arma_11.predict(start=2, end=4000,typ='levels')
# out-of-sample predict
arma_11_outpred = arma_11.predict(start=3999, end=6000,typ='levels')
# plot data and predicted values



def plot_ARMA_results(origdata, pred11in, pred11out):
Esempio n. 25
0

stationarity_test(df.AverageTemperature)

# The model is usually referred to as the ARMA(p,q) model where
# p is the order of the autoregressive part and
# q is the order of the moving average part.

# Determining this p and q value can be a challenge.
# So, pandas has a function for finding this.
# To get the p and q value -

print(
    arma_order_select_ic(df.AverageTemperature,
                         ic=['aic', 'bic'],
                         trend='nc',
                         max_ar=4,
                         max_ma=4,
                         fit_kw={'method': 'css-mle'}))

#Lets fit the model and make prediction using ARMA.
# Fit the model
ts = pd.Series(df.AverageTemperature, index=df.index)
model = ARMA(ts, order=(3, 3))
results = model.fit(trend='nc', method='css-mle', disp=-1)
print(results.summary2())

# Now, plot the prediction -
# Plot the model
fig, ax = plt.subplots(figsize=(10, 8))
fig = results.plot_predict('01/01/2010', '12/01/2023', ax=ax)
ax.legend(loc='lower left')
Esempio n. 26
0
fig = go.Figure()
fig.add_trace(go.Scatter(name="Raw Data", x=df.index, y=df.temp))
fig.add_trace(
    go.Scatter(name="AR model Prediction", x=ar_predict.index, y=ar_predict))
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(title_text="AR MODEL",
                  xaxis_title="Date",
                  yaxis_title="Temperature, C")
plotly.offline.plot(fig, filename=r'../Images/4_AR.png')

# 2. ARMA Model
# with statsmodel, aic check of params
from statsmodels.tsa import stattools as st
from statsmodels.tsa.arima_model import ARMA, ARIMA, ARMAResults

st.arma_order_select_ic(train_df, ic='aic')
arma = ARMA(train_df, order=[3, 2]).fit(maxlag=4, ic='aic', dates=date)
arma_predict = arma.predict('2019-10-22', '2020-10-21')

# Visualization
fig = go.Figure()
fig.add_trace(go.Scatter(name="Raw Data", x=df.index, y=df.temp))
fig.add_trace(
    go.Scatter(name="ARMA model Prediction",
               x=arma_predict.index,
               y=arma_predict))
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(title_text="ARMA MODEL",
                  xaxis_title="Date",
                  yaxis_title="Temperature, C")
plotly.offline.plot(fig, filename=r'../Images/5_ARMA.png')
Esempio n. 27
0
    for ind, item in enumerate(items):
        try:
            print("================================================")
            print("item[{0:d}]========================".format(item))
            temp = [float(x) for x in result[item]]
            training = temp[:-12]

            to_be_add = temp[-12:]
            try:
                testing = [float(x) for x in sales[ind]]
            except ValueError:
                continue
            gw_fcsting = gw_fcst[ind][:]
            predictions = []
            print(testing)
            res = arma_order_select_ic(training, ic=['aic', 'bic'], trend='nc')
            for t in range(len(testing)):
                # res.aic_min_order
                # res.bic_min_order
                model = ARIMA(training,
                              order=(res.aic_min_order[0], 1,
                                     res.aic_min_order[1]))
                model_fit = model.fit(disp=0)
                output = model_fit.forecast(steps=13)
                # print(len(output))
                yhat = output[0][0]
                obs = testing[t]
                predictions.append(yhat)
                training.append(to_be_add[t])
            yhmape = mape(testing, predictions)
            gwmape = mape(testing, gw_fcsting)
Esempio n. 28
0
plt.suptitle(u'分站点预测/实际值对比')
MSE = []
MAE = []
MAPE = []
layout_num = 0  #画图排版用的
for i in range(0, SITE_SIZE):
    if (layout_num == 6):
        layout_num = 0
        plt.figure(figsize=(16, 9))
        plt.suptitle(u'分站点预测/实际值对比')

    subplot = plt.subplot(3, 2, layout_num + 1)

    site = site_names[i]
    order = stattools.arma_order_select_ic(site,
                                           max_ar=3,
                                           max_ma=3,
                                           ic=['aic', 'bic', 'hqic'])
    print("(p,q):")
    pq = order.bic_min_order
    print(order.bic_min_order)  # (p,q)
    # 拟合(生成训练模型),开始预测
    model = ARMA(site, pq).fit()
    predict_data = model.predict(start=0, end=DATA_SIZE - 1)

    #在这里进行反归一化#
    predict_data = scaler.inverse_transform(predict_data.reshape(-1, 1))
    site = scaler.inverse_transform(site.reshape(-1, 1))
    site = np.exp(site)  #–––––––––––––
    predict_data = np.exp(predict_data)  #–––––––––––––––

    plt.plot(predict_data)
Esempio n. 29
0
    # TODO: Ljung-Box test
    # Check if acorr_ljungbox(ts, lags=1) source code


    # If AR model is needed and df_data is changed
    TS_new = TS_Analysis(df_data=df_data_new)

    # Autoregressive Model AR(p)
    AR_1_model = TS.AR_p(x='i1701', p=1)
    df_sp = TS.add_sp_lag(x='i1701', sp_lag=4)
    print(AR_1_model.summary())
    TS.acf(AR_1_model)
    TS.acf_table(AR_1_model, maxlag=12)

    # ARMA model
    best_order = st.arma_order_select_ic(df_data, max_ar=5, max_ma=5, ic=['aic', 'bic', 'hqic'])
    arma_model = ARMA(df_data, order=best_order.bic_min_order).fit(disp=-1, method='css')
    print(arma_model.summary())
    ####################################################################
    #              PART III  Model Selection and Prediction            #
    ####################################################################

    # # this is the nsteps ahead predictor function
    # from statsmodels.tsa.arima_model import _arma_predict_out_of_sample
    #
    # res = sm.tsa.ARMA(y, (3, 2)).fit(trend="nc")
    # res = arma_model
    # # get what you need for predicting one-step ahead
    # params = res.params
    # residuals = res.resid
    # p = res.k_ar
Esempio n. 30
0
def choose_order(ts, maxar, maxma):
    order = st.arma_order_select_ic(ts,
                                    maxar,
                                    maxma,
                                    ic=['aic', 'bic', 'hqic'])
    return order.bic_min_order
Esempio n. 31
0
    def arima_model(self, run_id):
        """Creates flow rate predictions using ARIMA model.

        Calls Arima.daily_avg to retrieve data for given run, then creates
        flow rate predictions by using statsmodels functions
        arma_order_select_ic and ARIMA. Three weeks of past flow rate data
        are also returned for plotting purposes.

        Args:
            run_id (int): id of run for which model will be created

        Returns:
            DataFrame: containing time-series flow rate predictions for next
            7 days and historical flow rate for past 21 days
        """
        # Retrieve data for modelling
        measures = self.daily_avg(run_id)

        # don't try to compute if there aren't any measures
        if measures is None:
            return pd.DataFrame()

        # Take past 7-day average of exogenous predictors to use for
        # future prediction
        exog_future_predictors = pd.concat(
            [measures.iloc[-7:, :].mean(axis=0).to_frame().T] * 7,
            ignore_index=True)

        try:
            # Find optimal order for model
            params = arma_order_select_ic(measures['flow'], ic='aic')
            try:
                # Build and fit model
                mod = ARIMA(measures['flow'],
                            order=(params.aic_min_order[0], 0,
                                   params.aic_min_order[1]),
                            exog=measures[['temp', 'precip']]).fit()

                prediction = pd.DataFrame([
                    mod.forecast(
                        steps=7,
                        exog=exog_future_predictors[['temp', 'precip']],
                        alpha=0.05)[0]
                ]).T
            except Exception:
                # If model doesn't converge, return "prediction"
                # of most recent day
                prediction = pd.concat([measures.iloc[-1, :].to_frame().T] * 7,
                                       ignore_index=True)['flow']
        except ValueError:
            # If order fitting doesn't converge, return "prediction"
            # of most recent day
            prediction = pd.concat([measures.iloc[-1, :].to_frame().T] * 7,
                                   ignore_index=True)['flow']

        # Add dates and return past 21 days for plotting
        prediction_dates = [
            measures.index[-2] + datetime.timedelta(days=x)
            for x in range(0, 7)
        ]
        prediction.index = prediction_dates
        past = measures['flow'][-22:-1]
        prediction = pd.concat([past[:-1], prediction], axis=0)

        return prediction
Esempio n. 32
0
def _arma_order_selector(ts, ic='bic'):
    res = arma_order_select_ic(ts, ic=ic, fit_kw={'method': 'css'})
    return getattr(res, '{}_min_order'.format(ic))
Esempio n. 33
0
autocorrelation_plot(ts);
plt.xlim(xmax=100);
plt.figure(figsize=(12,6))
autocorrelation_plot(ts);
plt.xlim(xmax=10);


# The highest auto-correclation peaks are every ~365 day and oscillates with a frequence of about 7 days. This corresponds with our intuitive ideas about shopping - that it is a weekly chore. Even though ecuadorians are most likely paid monthly, there is no significant periodicity visible on this scale.
# 
# From the last diagram it seems that the autocorrelation is significant (above the dashed line) for a period of maximum 2 days. All the references then suggest to use 2 for the p parameter. But the stats.model library contains a nifty tool for coefficient selection : arma_order_select_ic(). It performs a grid search with the p and q parameters. Let's see what it says for max parameters 10 and the [Baysian Information Criterion](https://en.wikipedia.org/wiki/Bayesian_information_criterion) for estimator:

# In[ ]:



result = arma_order_select_ic(ts,max_ar=10, max_ma=10, ic=['aic','bic'], trend='c', fit_kw=dict(method='css',maxiter=500))
print('The bic prescribes these (p,q) parameters : {}'.format(result.bic_min_order))
print('The aic prescribes these (p,q) parameters : {}'.format(result.aic_min_order))
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.title('bic results')
seaborn.heatmap(result.bic);
plt.subplot(1,2,2)
plt.title('aic results')
seaborn.heatmap(result.aic);


# With the initial parameters, we are getting many many convergence warnings, which may show that the model does not fit well the time series. It may be important to factor out seasonality first, which we shall do in a later part of the analysis. To reduce the amount of warnings, we raised the number of iterations to a maximum of 500, in case of a very slow converging rate. We also plotted heatmaps of the results for both the aic and bic, for comparison.
# 
#   It is clear from the heatmap that we can afford to run the model with parameters (5,0,5)
red_35.plot(figsize=(12, 8))
plt.show()

# In[132]:

#原假设是白噪音
from statsmodels.stats.diagnostic import acorr_ljungbox
acorr_ljungbox(red_35, lags=1)
acorr_ljungbox(sub_pur, lags=1)

# In[138]:

#自动选择,p,q的阶数
from statsmodels.tsa.stattools import arma_order_select_ic
arma_order_select_ic(diff, max_ar=7, max_ma=7, ic='aic')['aic_min_order']

# In[139]:

arma_order_select_ic(diff, max_ar=7, max_ma=7, ic='bic')['bic_min_order']

# In[140]:

arma_order_select_ic(diff, max_ar=7, max_ma=7, ic='hqic')['hqic_min_order']

# In[142]:

#拒绝原假设,残差不是白噪声
model_23 = ARIMA(diff, order=(2, 0, 3)).fit()
red_23 = model_23.resid
acorr_ljungbox(red_23, lags=1)
Esempio n. 35
0
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import statsmodels.tsa.stattools as st
from statsmodels.tsa.arima_model import ARMA
from statsmodels.sandbox.stats.diagnostic import acorr_ljungbox
import matplotlib.pyplot as plt

if __name__ == "__main__":
    allData = pd.read_csv('MonthlyWeather.txt', header=None, sep=',')
    data = allData.iloc[:, 0]
    original_new = data[234:]
    data = data[0:234]
    order = st.arma_order_select_ic(data, ic=['aic', 'bic'])
    model = ARMA(data, order=(4, 3))
    result_arma = model.fit(disp=-1, method='css')
    print(result_arma.summary())
    predict_ts = result_arma.predict()
    err = (data - predict_ts).dropna()
    p_value = acorr_ljungbox(err, [6, 12, 18, 24])
    print(p_value)
    predict_new = result_arma.predict(
        234,
        271,
    )
    ax = predict_new.plot(label='forecast')
    original_new.plot(label='observed')
    ax.set_xlabel('Month')
    ax.set_ylabel('Precipitation')
    plt.legend()
    plt.show()