예제 #1
0
    lbx, px = acorr_ljungbox(pred_error, lags=lagnum)
    h = (px < 0.05).sum()  # p值小于0.05,认为是非噪声
    if h > 0:
        print(u'模型ARMA(%s,%s)不符合白噪声检验' % (p, q))
        print('在AIC矩阵中去掉[%s,%s]组合,重新进行计算' % (p, q))
        matrix.iloc[p, q] = np.nan
        arimafail = arma
        continue
    else:
        print(p, q)
        print(u'模型ARMA(%s,%s)符合白噪声检验' % (p, q))
        break

# In[6]:

arma.summary()  # 当p,q值为0,0时,summary方法报错

# In[7]:

forecast_values, forecasts_standard_error, forecast_confidence_interval = arma.forecast(
    5)
forecast_values

# In[8]:

predictdata = pd.DataFrame(xtest_value)
predictdata.insert(1, 'CWXT_DB:184:C:\\_predict', forecast_values)
predictdata.rename(columns={
    'CWXT_DB:184:C:\\': u'实际值',
    'CWXT_DB:184:C:\_predict': u'预测值'
},
예제 #2
0
corr, _ = stats.pearsonr(forward_DF['UnderlyingTick3'],
                         forward_DF['UnderlyingTick1'])
plt.close()
sm.graphics.tsa.plot_acf(forward_DF['ForwardTick1'], lags=5)
plt.show()
plt.close()
sm.graphics.tsa.plot_acf(forward_DF['ForwardTick1'], None,
                         forward_DF['Underlyinglag1'])
plt.show()

mod01 = ARMA(forward_DF['ForwardTick1'], (0, 20),
             forward_DF['UnderlyingTick1'],
             None,
             None,
             missing='drop').fit()
mod01.summary()

forward_DF['RFor1'] = forward_DF['Forward'] / forward_DF['Forwardlag1'] - 1
forward_DF[
    'RUnd1'] = forward_DF['Underlying'] / forward_DF['Underlyinglag1'] - 1
#forward_DF['ForRatio'] = np.log(np.abs(forward_DF['RFor1'] / forward_DF['RUnd1']))

forward_DF['ForRatio'] = np.abs(forward_DF['RFor1']) / (
    np.abs(forward_DF['RFor1']) + np.abs(forward_DF['RUnd1']))
forward_DF['FutRatio'] = np.abs(forward_DF['RUnd1']) / (
    np.abs(forward_DF['RFor1']) + np.abs(forward_DF['RUnd1']))

forward_DF['ForRatio'] = forward_DF['ForwardTick1'] * np.abs(
    forward_DF['RFor1']) / (np.abs(forward_DF['RFor1']) +
                            np.abs(forward_DF['RUnd1']))
forward_DF['FutRatio'] = forward_DF['ForwardTick1'] * np.abs(
        arimafail = arma
        continue
    else:
        # print(p,q)
        print('模型ARMA(%s,%s)符合白噪声检验' % (p,q))
        break

'''
pred_error的p值为: [0.85213186 0.87622837 0.5091596  0.17549072 0.23165608 0.32636535
 0.23061123 0.21730751 0.26456821 0.34228534 0.32163025 0.23001753]
h= 0
模型ARMA(1,0)符合白噪声检验
 '''

# 第   5   步--C盘---------模型预测
print('模型报告:summary():\n', arma.summary())
forecast_values, forecasts_standard_error, forecast_confidence_interval = arma.forecast(5)

pre_data = pd.DataFrame(xtest_value)
pre_data.insert(1, 'CWXT_DB:184:C:\\_predict', forecast_values)
pre_data.rename(columns={'CWXT_DB:184:C:\\' : '实际值', 'CWXT_DB:184:C:\\_predict': '预测值'}, inplace=True)
result_d = pre_data.applymap(lambda x: '%.2f' % x)
result_d.to_excel('../my_data/pedictdata_C_AIC_ARMA.xlsx')

# 第   5   步--D盘---------模型评价
# 为了评价时序预测模型效果的好坏,本章采用3个衡量模型预测精度的统计量指标:平均绝对误差、均方根误差、平均绝对百分误差
result = pd.read_excel('../my_data/pedictdata_C_AIC_ARMA.xlsx', index_col='COLLECTTIME')
result = result.applymap(lambda x: x/10**6)
print('模型结果:\n', result)

abs_ = (result['预测值'] - result['实际值']).abs()
예제 #4
0
sgt.plot_acf(df.returns, lags=40, zero = False)
plt.title("ACF FTSE Returns", size=24)

sgt.plot_pacf(df.returns, lags = 40, zero = False, method = ('ols'))
plt.title("PACF FTSE Returns", size=24)
plt.show()
# => we know data is non-stationary from a previous exercise


# select AR model (by looking to PACF here) and iterating through more models...until LLR will stop going down
# ----------
model_ret_ar_1 = ARMA(df.returns, order = (1,0)).fit()
print(model_ar.summary())
print('----------')
model_ar_4 = ARMA(df.returns, order=(4,0)).fit()
print(model_ar_4.summary())
print('----------')
model_ar_6 = ARMA(df.returns, order=(6,0)).fit()
print(model_ar_6.summary())
print('----------')
# => by comparing the LLR stat and AIC/BIC from models' summary we can see what is the best order ... (we would find out AR(6,0))
# => remember that auto_arima is much easier...



# compare LLR results across models
# ----------
def LLR_test(mod_1, mod_2, DF=1):
    L1 = mod_1.fit().llf
    L2 = mod_2.fit().llf
    LR = (2*(L2-L1))
예제 #5
0
for p in range(pmax + 1):
    temp = []
    for q in range(qmax + 1):
        try:
            temp.append(ARMA(Y, (p, q)).fit().bic)
        except:
            temp.append(None)
    bic_matrix.append(temp)

bic_matrix = pd.DataFrame(bic_matrix)  #将其转换成Dataframe 数据结构
p, q = bic_matrix.astype(
    'float64').stack().idxmin()  #先使用stack 展平, 然后使用 idxmin 找出最小值的位置
print(u'BIC 最小的p值 和 q 值:%s,%s' % (p, q))  #  BIC 最小的p值 和 q 值:0,0
#所以可以建立ARIMA 模型,ARMA(0,0)
AR = ARMA(Y, (0, 0)).fit()
print(AR.summary())

AR = ARMA(data.dropna(), (1, 1))


#sm.tsa.stattools.arma_order_select_ic(data,max_ma=3)
def ret_plot(ts, title=''):

    ts1 = ts**2

    ts2 = np.abs(ts)

    with plt.style.context('ggplot'):

        fig = plt.figure(figsize=(12, 6))
예제 #6
0
# split dataset (on straight data = prices)
# ----------
size = int(len(df_comp) * 0.8)
df = df_comp.iloc[:size]
df_test = df_comp.iloc[size:]

# review ACF (in reality is more functional to run auto_arima vs checking ACF/PACF manually, but this is for sake of example)
# ----------
sgt.plot_acf(df.returns[1:], zero=False, lags=40)
plt.title("ACF for Returns", size=24)

# select MA model (by looking to PACF here) and iterating through more models...until LLR will stop going down
# ----------
model_ret_ma_8 = ARMA(df.returns[1:], order=[0, 8]).fit()
print(model_ret_ma_8.summary())
print("\nLLR test p-value = " + str(LLR_test(model_ret_ma_7, model_ret_ma_8)))

# => by comparing the LLR stat and AIC/BIC from models' summary we can see what is the best order ... (we would find out MA(0,8))
# => remember that auto_arima is much easier...


# compare LLR results across models
# ----------
def LLR_test(mod_1, mod_2, DF=1):
    L1 = mod_1.fit().llf
    L2 = mod_2.fit().llf
    LR = (2 * (L2 - L1))
    p = chi2.sf(LR, DF).round(3)
    return p
예제 #7
0
def ts_arma(ts, p, q, start,end):
    arma = ARMA(ts, order=(p, q)).fit(disp = -1)
    print("未来五年:", arma.forecast(5)[0])
    ts_predict_arma = arma.predict(start,end)
    print(arma.summary())
    return ts_predict_arma
예제 #8
0
from statsmodels.graphics.tsaplots import plot_pacf

# partial autocorrelation, use this to find the AR param
# see where the the last time we exceed the p-value th
fig, ax = plt.subplots(figsize=(12, 5))
plot_pacf(udiff.values, lags=10, ax=ax)
plt.show()

from statsmodels.tsa.arima_model import ARMA

# Notice that you have to use udiff - the differenced data rather than the original data.
# udiff is an np.array. They wrap it in a tuple(), I don't
# think that is neccessary.
ar1 = ARMA(udiff.values,
           (3, 1)).fit()  # TODO: Fit an ARMA model to the differenced data
ar1.summary()

# the above creates the model ARMA, p =3, q=1

# next we want to plot the fitted values from the ARMA
# model against the actual values in udiff
# the udiff values are the log return values.
plt.figure(figsize=(12, 8))
plt.plot(udiff.values, color='blue')
preds = ar1.fittedvalues
plt.plot(preds, color='red')
plt.show()

# next, let's make a 2 step ahead forecast, and plot it
steps = 2
forecast = ar1.forecast(steps=steps)[0]
예제 #9
0

    # If AR model is needed and df_data is changed
    TS_new = TS_Analysis(df_data=df_data_new)

    # Autoregressive Model AR(p)
    AR_1_model = TS.AR_p(x='i1701', p=1)
    df_sp = TS.add_sp_lag(x='i1701', sp_lag=4)
    print(AR_1_model.summary())
    TS.acf(AR_1_model)
    TS.acf_table(AR_1_model, maxlag=12)

    # ARMA model
    best_order = st.arma_order_select_ic(df_data, max_ar=5, max_ma=5, ic=['aic', 'bic', 'hqic'])
    arma_model = ARMA(df_data, order=best_order.bic_min_order).fit(disp=-1, method='css')
    print(arma_model.summary())
    ####################################################################
    #              PART III  Model Selection and Prediction            #
    ####################################################################

    # # this is the nsteps ahead predictor function
    # from statsmodels.tsa.arima_model import _arma_predict_out_of_sample
    #
    # res = sm.tsa.ARMA(y, (3, 2)).fit(trend="nc")
    # res = arma_model
    # # get what you need for predicting one-step ahead
    # params = res.params
    # residuals = res.resid
    # p = res.k_ar
    # q = res.k_ma
    # k_exog = res.k_exog
    for key,values in dftest[4].items():
        dfoutput["Critical Values(%s)"%key]=values
    return round(dfoutput,4)

print(ADF(Ct["Column2"]))


# In[110]:


#模型拟合

from statsmodels.tsa.arima_model import ARMA

Ct_ARMA = ARMA(Ct["Column2"], order=(4,2)).fit()
print(Ct_ARMA.summary())


# In[116]:


plt.plot(TSdata['Column1'], TSdata["Column2"], "o-")
plt.plot(Ct["Column1"][0:53], Ct_ARMA.fittedvalues, 'o-')

foresee = Ct_ARMA.forecast(6)[0].tolist()
plt.plot(TSdata["Column1"][53:66],foresee,'*-')
plt.show()


# In[118]:
예제 #11
0
diff = 3
df1 = pd.read_csv(f'PeMSD3_V_228.csv', header=None)
output_pa = f'Covid19_vector_pa_ucc_0607_{diff}diff.csv'
sum_c = 0
for i in range(228):
    timeseries = []
    if i >= 0:
        for j in range(288 * 44):
            if j <= (288 * 34):
                timeseries.append(df1.iloc[j, i])
        p = 5
        q = 0
        print(p, q)
        model = ARMA(timeseries, order=(p, q)).fit()
        data_h = model.summary()
        print(data_h)
        B = []
        for k in range(p + q):
            B.append(data_h[k])
        with open(output_pa, "a") as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(B)
            csv_file.close()
        print(i + 1, 'is done.')

output_eucl_ntran = f'Covid19_W_ucc0607_{diff}diff_ntran.csv'
output_eucl = f'Covid19_W_ucc0607_{diff}difflogturn_51.csv'
df4 = pd.read_csv(output_pa, header=None)

df4.values
#
# The final stepping stone before **ARIMA** models are **ARMA** models.
#
# _ARMA_ models combine the autoregressive models and moving average models. We combine both, parameterizing the behavior of the model with `p` and `q` terms corresponding to the `AR(p)` model and `MA(q)` model.
#
# Autoregressive models slowly incorporate changes in preferences, tastes, and patterns. Moving average models base their prediction not on the prior value but the prior error, allowing us to correct sudden changes based on random events - supply, popularity spikes, etc.
#

# In[140]:

from statsmodels.tsa.arima_model import ARMA

store1_sales_data = store1_open_data[['Sales']][:len(store1_open_data) /
                                                2].astype(float)
arma10 = ARMA(store1_sales_data, (1, 0)).fit()
arma10.summary()

# _The previous model was the equivalent of a AR(1) model, since the order for MA is zero._
#

# In[143]:

# We can now try fitting a AR(2) model, in which we are modeling a month of sales based
# on the values for the previous two months.
arma20 = ARMA(store1_sales_data, (2, 0)).fit()
arma20.summary()

# In[144]:

# Now we check the residuals of our model to see how well our model is capturing the
# phenomena.
예제 #13
0
plt.title('Partial Autocorrelation Function (p=1)')
plt.tight_layout()

'''
In this plot, the two dotted lines on either sides of 0 are the confidence interevals.
These can be used to determine the p and q values as:

- p: The lag value where the PACF chart crosses the upper confidence interval for the first time, in this case p=1.

- q: The lag value where the ACF chart crosses the upper confidence interval for the first time, in this case q=1.
'''

'''
### Fit ARMA model with statsmodels

1. Define the model by calling `ARMA()` and passing in the p and q parameters.

2. The model is prepared on the training data by calling the `fit()` function.

3. Predictions can be made by calling the `predict()` function and specifying the index of the time or times to be predicted.
'''

from statsmodels.tsa.arima_model import ARMA


model = ARMA(x, order=(1,1)).fit() # fit model

print(model.summary())
plt.plot(x)
plt.plot(model.predict(), color='red')
plt.title('RSS: %.4f'% sum((model.fittedvalues-x)**2))
예제 #14
0
del df_comp['nikkei']

# split dataset (on straight data = prices)
# ----------
size = int(len(df_comp) * 0.8)
df = df_comp.iloc[:size]
df_test = df_comp.iloc[size:]

# review ACF and PACF (in reality is more functional to run auto_arima vs checking ACF/PACF manually, but this is for sake of example)
# ----------
# not done here

# select ARMA model (by looking to PACF here) and iterating through more models...until LLR will stop going down
# ----------
model_ret_ar_1_ma_1 = ARMA(df.returns[1:], order=(1, 1)).fit()
print(model_ret_ar_1_ma_1.summary())
print('----------')
# -- going manually through multiple iterations (not reported here) would lead to this higher order model
model_ret_ar_1_ma_3 = ARMA(df.returns[1:], order=(1, 3))
print(model_ret_ar_1_ma_3.summary())
print('----------')
model_ret_ar_3_ma_2 = ARMA(df.returns[1:], order=(3, 2))
print(model_ret_ar_3_ma_2.summary())
print('----------')

# => by comparing the LLR stat and AIC/BIC from models' summary we can see what is the best order ... (we would find out ARMA(3,2))
# => remember that auto_arima is much easier...


# compare LLR results across models
# ----------
예제 #15
0
파일: ex_tsa.py 프로젝트: bitsunlei/Learn
if False:
    plt.subplot(121)
    plt.plot(lag_acf)
    plt.title('ACF (q=1)')

    plt.subplot(122)
    plt.plot(lag_pacf)
    plt.title('PACF (q=1)')
    plt.tight_layout()
    plt.show()

# Prediction with ARMA models
# 1. define the model by calling ARMA
# 2. The model is prepared on the training data by calling the fit() function
# 3. Predictions can be made by calling the predict() function and specifying
#    the index of the time or times to be predicted.
from statsmodels.tsa.arima_model import ARMA

x = df['Gym'].astype('float')
model = ARMA(x, order=(1, 1)).fit()
print(model.summary())
if True:
    plt.plot(x, label='Gym original')
    plt.plot(model.predict(), color='red', label='predicted')
    plt.title('RSS: %.4f' % sum((model.fittedvalues - x)**2))
    plt.legend(loc='best')
    plt.show()

print('The End.')
autocorrelation_plot(df.ExplosivityIndexMax) 
#both positive / negative values, quite alot of ups / downs 

df.head()

from statsmodels.graphics.tsaplots import plot_acf

plot_acf(df.ExplosivityIndexMax, lags=200)
plt.show() #all positive. 

from statsmodels.tsa.arima_model import ARMA

#arma (1,0)
arma = df[['ExplosivityIndexMax']].astype(float)
model = ARMA(arma, (1, 0)).fit()
print model.summary()

model.resid.plot()
plot_acf(model.resid,lags=100)
plt.show 

#arma(2,0)
arma2 = df[['ExplosivityIndexMax']].astype(float)
model = ARMA(arma2, (2, 0)).fit()
print model.summary()

model.resid.plot()
plot_acf(model.resid,lags=100)
plt.show 

#ARIMA (2,0,2)
예제 #17
0
import math
from statistics import mean

plt.style.use('fivethirtyeight')

df = pd.read_excel("../00Daily/Egypt.xlsx", squeeze=True, parse_dates=True)
df = df[["Date", "LocalTransmission"]]
df.set_index("Date", inplace=True)
df.dropna(inplace=True)
##df['Date'] = pd.to_datetime(df['Date'])
LocalTransmission = df['LocalTransmission'].astype('int32')
#print (df.head())
print(df.index)

result = ARMA(df, order=(0, 1)).fit(disp=False)
print(result.summary())
#print(result.params)
predictions = result.predict(start="2020-03-01", end="2020-05-01")
#accuracy = result.score()
print(predictions)
##accuracy = result.score()
#print (accuracy)

result.plot_predict(start="2020-03-01", end="2020-05-01")
plt.suptitle('Prediction for postive cases in Egypt \n Algorithm used: MA',
             fontsize=12)

plt.show()

##def mean_forecast_error(y, yhat):
##    return y.sub(yhat).mean()
예제 #18
0
np.round(pacf(y1),3)

plot_pacf(y2); # AR(1)模型的自相关系数

##5.3.2 ARMA 模型建立与检验
plot_acf(y3);
plot_pacf(y3);

import statsmodels.tsa.stattools as ts
ts.arma_order_select_ic(y1,max_ar=3,max_ma=3,ic=['aic','bic','hqic'])
ts.arma_order_select_ic(y1,max_ar=3,max_ma=3,ic=['aic','bic','hqic'])
ts.arma_order_select_ic(y3,max_ar=3,max_ma=3,ic=['aic', 'bic','hqic'])

from statsmodels.tsa.arima_model import ARMA
y1_arma=ARMA(y1,order=(1,0)).fit()
y1_arma.summary()

ARMA(y2,order=(0,1)).fit().summary()

ARMA(y3,order=(1,1)).fit().summary()

plt.plot(y3,'o-',ARMA(y3,order=(1,1)).fit().fittedvalues);

##5.4.3 序列的平稳性检验
from statsmodels.tsa.stattools import adfuller
def ADF(ts): #平稳性检验
    dftest = adfuller(ts)
    # 对上述函数求得的值进行语义描述
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value