예제 #1
0
def bestfit(x, y):
    return (x + y) / 2


sample_size = 500
# Generate some data
x = np.arange(sample_size)
y1 = np.sin(x / 20)
y2 = y1 + np.random.uniform(size=sample_size)
y3 = y2 + np.random.uniform(size=sample_size)
yr = np.random.uniform(size=sample_size)

bf = bestfit(y2, y3)

coef = am.ARIMA(bf, [1, 0, 0]).fit().params

tau = 252

speedReversion = abs(coef[0]) / tau
equilibriumMean = coef[1] / (1 - coef[0])
SDcointResdiual = math.sqrt(2 * speedReversion * bf.var() /
                            (1 - math.exp(-2 * speedReversion * tau)))
SDEq = SDcointResdiual / math.sqrt(2 * speedReversion)

# We are looking for a shortish halflife. > 50 days is too much
halflife = math.log(2) / speedReversion

short_mean = pd.rolling_mean(bf, 5)
long_mean = pd.rolling_mean(bf, 20)
예제 #2
0
    def _seasonal_arima(self,
                        endog=None,
                        exog=None,
                        p=None,
                        d=None,
                        q=None,
                        imodels=None,
                        include_holidays=None,
                        ift_matrix=None,
                        stepwise_fit=None,
                        optimize=None):
        """
        This function runs the ARIMA model with different Fourier transformations based on different number of
        frequencies.
        :param pandas.Dttaframe endog: A pandas dataframe storing the endogenous time series
        :param pandas.Dttaframe exog: A pandas dataframe storing the exogenous pulses obtained through the Fourier transformation
        and / or the a binary one hot encoding different US holidays
        :param int p: A tuple containing the minimum and maximum of the auto-regressive terms to be considered in the model
        :param int d: A tuple containing the minimum and maximum of the differencing to be considered in the model
        :param int q: A tuple containing the minimum and maximum of the moving average terms to be considered in the model
        :param int imodels: The current model run based on the current exogenous obtained through first imodels*2 many most
        relevant frequencies from the Fourier transform
        :param bool include_holidays: Whether to consider holidays as exogenous
        :param list ift_matrix: A list of list All exogenous variables where the ith column is the inverse Fourier
        transformation of the time series with first i*2 most relevant frequencies
        :param int train_len: Storing the length of the time series
        :param int pred_len: Storing the length of the future time points to predict
        :param bool arima_error: Storing whether there is any exception occurred in the auto_arima run
        :param list stepwise_fit: A list storing different model object
        :param bool optimize: Flag to identify whether called from hyperparameter optimization
        :param list x_pred: list storing exogenous variable corresponding to the time point to predict
        :return:
        """

        import numpy as np
        import statsmodels.tsa.arima_model as arima

        # Extract the exogenous variable generated based on (imodels * 2) number of most significant
        # frequencies
        if imodels > 0:
            fourier_exog = ift_matrix[0].reshape(-1, 1)[:, 0].reshape(-1, 1)
            if not include_holidays:
                exog = np.real(fourier_exog)
            else:
                exog['fourier_feature'] = np.float64(
                    np.real(fourier_exog[:, 0]))

        # This check is required due to a bug in statsmodel arima which inflates the predictions and std error
        # for time series containing only 0's. Can be removed if fixed in a later version of statsmodel
        # or pyramid
        if np.count_nonzero(endog) == 0:
            idx_max = len(endog) // 2
            idx = int(np.random.randint(0, idx_max, 1)[0])
            endog[idx] = abs(np.random.normal(0, 1e-3, 1)[0])

        try:
            stepwise_fit.append(
                arima.ARIMA(endog=endog, exog=exog,
                            order=(p, d, q)).fit(seasonal=False,
                                                 trace=False,
                                                 method='css',
                                                 solver='bfgs',
                                                 error_action='ignore',
                                                 stepwise_fit=True,
                                                 warn_convergence=False,
                                                 disp=False))
        except Exception as e:
            raise LADStructuralError(message=str(e))

        return 0
    print(data.head())

    # 最后5条数据作为验证数据
    train_data = data.iloc[:-5, :]
    test_data = data.iloc[-5:, :]
    # print(test_data)

    # 定阶
    pmax = len(train_data['CWXT_DB:184:D:\\']) // 10
    qmax = len(train_data['CWXT_DB:184:D:\\']) // 10
    bic_matrix = []
    for p in range(pmax):
        temp = []
        for q in range(qmax):
            try:
                result = arima_model.ARIMA(train_data['CWXT_DB:184:D:\\'],
                                           (p, 1, q)).fit().bic
                # result if np.isnan(result) else None
                temp.append(None if pd.isna(result) else result)
                # temp.append(result)
            except:
                temp.append(None)
        bic_matrix.append(temp)

    bic_matrix = pd.DataFrame(bic_matrix)
    # 平铺 找最小值的位置
    print(bic_matrix)
    print(pd.isna(bic_matrix))
    p, q = bic_matrix.stack().idxmin()
    # BIC最小的p值和q值为:1、1
    print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
예제 #4
0
# -*- coding: UTF-8 -*-
"""
file:Quant24.py
data:2019-10-1715:43
author:Grey
des:
"""
import pandas as pd
from arch.unitroot import ADF
from statsmodels.tsa import arima_model

CPI = pd.read_csv('data/024/CPI.csv', index_col='time')

CPI.index = pd.to_datetime(CPI.index)
CPItrain = CPI[3:]
CPItrain = CPItrain.dropna().CPI

model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit()
model1.summary()
model2 = arima_model.ARIMA(CPItrain, order=(1, 0, 2)).fit()
model2.summary()
예제 #5
0
 fig.savefig("timecpi.png")
 # 用单位根检验序列的稳定性
 CPITrain = CPITrain.dropna().CPI
 print(ADF(CPITrain, max_lags=10).summary().as_text())
 # 用LB检验cpi序列是否为白噪声
 LB = stattools.q_stat(stattools.acf(CPITrain)[1:12], len(CPITrain))
 print(LB[1][-1])
 # 模型识别与估计
 fig = plt.figure()
 axe1 = plt.subplot(121)
 axe2 = plt.subplot(122)
 plot1 = plot_acf(CPITrain, lags=30, ax=axe1)
 plot2 = plot_pacf(CPITrain, lags=30, ax=axe2)
 fig.savefig("model.png")
 # 参数估计
 model1 = arima_model.ARIMA(CPITrain, order=(1, 0, 1)).fit()
 print(model1.summary())
 model2 = arima_model.ARIMA(CPITrain, order=(1, 0, 2)).fit()
 print(model2.summary())
 model3 = arima_model.ARIMA(CPITrain, order=(2, 0, 1)).fit()
 model4 = arima_model.ARIMA(CPITrain, order=(2, 0, 2)).fit()
 model5 = arima_model.ARIMA(CPITrain, order=(3, 0, 1)).fit()
 model6 = arima_model.ARIMA(CPITrain, order=(3, 0, 2)).fit()
 # 模型诊断
 # 计算置信区间
 print(model1.conf_int())
 print(model6.conf_int())
 # 检验残差序列是否为白噪音
 stdresid = model6.resid / math.sqrt(model6.sigma2)
 fig = plt.figure()
 plt.plot(stdresid)
예제 #6
0
def ARIMA(series, n):
    '''
    只讨论一阶差分的ARIMA模型,预测,数字索引从1开始
    series:时间序列
    n:需要往后预测的个数
    '''
    series = np.array(series)
    series = pd.Series(series.reshape(-1))
    currentDir = os.getcwd()  #当前工作路径
    #一阶差分数据
    fd = series.diff(1)[1:]
    plot_acf(fd).savefig(currentDir + '/一阶差分自相关图.png')
    plot_pacf(fd).savefig(currentDir + '/一阶差分偏自相关图.png')
    #一阶差分单位根检验
    unitP = adfuller(fd)[1]
    if unitP > 0.05:
        unitAssess = '单位根检验中p值为%.2f,大于0.05,认为该一阶差分序列判断为非平稳序列' % (unitP)
        #print('单位根检验中p值为%.2f,大于0.05,认为该一阶差分序列判断为非平稳序列'%(unitP))
    else:
        unitAssess = '单位根检验中p值为%.2f,小于0.05,认为该一阶差分序列判断为平稳序列' % (unitP)
        #print('单位根检验中p值为%.2f,小于0.05,认为该一阶差分序列判断为平稳序列'%(unitP))
    #白噪声检验
    noiseP = acorr_ljungbox(fd, lags=1)[-1]
    if noiseP <= 0.05:
        noiseAssess = '白噪声检验中p值为%.2f,小于0.05,认为该一阶差分序列为非白噪声' % noiseP
        #print('白噪声检验中p值为%.2f,小于0.05,认为该一阶差分序列为非白噪声'%noiseP)
    else:
        noiseAssess = '白噪声检验中%.2f,大于0.05,认为该一阶差分序列为白噪声' % noiseP
        #print('白噪声检验中%.2f,大于0.05,认为该一阶差分序列为白噪声'%noiseP)
    #BIC准则确定p、q值
    pMax = int(series.shape[0] / 10)  # 一般阶数不超过length/10
    qMax = pMax  # 一般阶数不超过length/10
    bics = list()
    for p in range(pMax + 1):
        tmp = list()
        for q in range(qMax + 1):
            try:
                tmp.append(arima_model.ARIMA(series, (p, 1, q)).fit().bic)
            except Exception as e:
                #print(str(e))
                tmp.append(1e+10)  #加入一个很大的数
        bics.append(tmp)
    bics = pd.DataFrame(bics)
    p, q = bics.stack().idxmin()
    #print('BIC准则下确定p,q为%s,%s'%(p,q))
    #建模
    model = arima_model.ARIMA(series, order=(p, 1, q)).fit()
    predict = model.forecast(n)[0]
    return {
        'model': {
            'value': model,
            'desc': '模型'
        },
        'unitP': {
            'value': unitP,
            'desc': unitAssess
        },
        'noiseP': {
            'value': noiseP[0],
            'desc': noiseAssess
        },
        'p': {
            'value': p,
            'desc': 'AR模型阶数'
        },
        'q': {
            'value': q,
            'desc': 'MA模型阶数'
        },
        'params': {
            'value': model.params,
            'desc': '模型系数'
        },
        'predict': {
            'value': predict,
            'desc': '往后预测%d个的序列' % (n)
        }
    }
예제 #7
0
plt.plot(ac)
plt.grid()
plt.show()
pac=pacf(dflog.dropna())
plt.plot(pac)
plt.grid()
plt.show()


# Assigning ACF(Aauto-correlation Function) and PACF(Partial Auto-Correlation Function) plots 

# In[24]:


from statsmodels.tsa import arima_model
arima=arima_model.ARIMA(df,(2,1,2))
model=arima.fit()


# ARIMA(p,d,q)=ARIMA(2,1,2)
# p is the number of autoregressive terms,
# d is the number of nonseasonal differences needed for stationarity, and
# q is the number of lagged forecast errors in the prediction equation.

# In[25]:


model.plot_predict(start=1,end=204)


# In[ ]:
예제 #8
0
# %% AR Model
ed_3h_ar = ed_3h_htm[['Ct']]
ar_mdl = ar_model.AR(ed_3h_ar)
ar_fit = ar_mdl.fit(maxlag=(WndwDys*DyStps))
ed_3h_ar['prediction'] = ar_fit.predict()
ed_3h_ar['MSE'] = MSE(ed_3h_ar.Ct,ed_3h_ar.prediction,WndwDys,DyStps)


ed_3h['AR'] = ed_3h_ar.prediction
ed_3h_mse['AR'] = ed_3h_ar.MSE

# %% ARMA model


ed_3h_arima = ed_3h_htm[['Ct']]
arima_mdl = arima_model.ARIMA(ed_3h_arima,(30,1,30))
arima_fit = arima_mdl.fit()
ed_3h_arima['prediction'] = ed_3h_arima.predict()
ed_3h_arima['MSE'] = MSE(ed_3h_arima.Ct,ed_3h_arima.prediction,WndwDys,DyStps)


ed_3h['ARIMA'] = ed_3h_arima.prediction
ed_3h_mse['ARIMA'] = ed_3h_arima.MSE





# %% Plot
ed_3h['dates'] = [date2num(date) for date in ed_3h.index]
ed_3h_mse['dates'] = [date2num(date) for date in ed_3h_mse.index]
    uber_scaler = StandardScaler().fit(df[["uber_sup"]])
    df["uber_sup"] = uber_scaler.transform(df[["uber_sup"]])
    """

    # ARIMA (Time -> Uber).
    train = df[df["timestamp"] <= threshold]["uber_sup"]
    test = df[df["timestamp"] >= threshold - 86400]["uber_sup"]

    # Resampling to get hourly data (otherwise exception).
    uber_train = train.resample('H').ffill(). \
    reindex(pd.date_range(train.index[0], train.index[-1], freq = "H"))
    uber_test = test.resample('H').ffill(). \
    reindex(pd.date_range(test.index[0], test.index[-1], freq = "H"))

    # Fit model and predict.
    arima = arima_model.ARIMA(uber_train, (15, 0, 5)).fit()
    uber_predict = arima.predict(uber_test.index[0],
                                 uber_test.index[-1],
                                 dynamic=True)
    mse = math.sqrt(np.mean((uber_predict - uber_test) / 24)**2)
    print "Root Mean squared error: " + str(mse)
    """
    # Plot figures.
    fig_path = sys.path[0] + "/fig/time_alone_reg.png"
    fig, ax = plt.subplots(figsize=(8, 1))
    ax.set_xlabel("Time", fontsize = 10)
    ax.set_ylabel("Value", fontsize = 10)
    ax.plot(df["uber_data"], c = "r", label = "real")
    ax.plot(arima.fittedvalues[:-24], c = "k", label = "train")
    ax.plot(uber_predict, c = "b", label = "test")
    ax.set_ylim([-2, 4])
예제 #10
0
#lag即为上述检验表达式中的m,在这里我们选择检验12阶的自相关系数。
LjungBox = stattools.q_stat(stattools.acf(CPItrain)[1:12], len(CPItrain))
LjungBox[1][-1]

#将画面一分为二
axe1 = plt.subplot(121)
axe2 = plt.subplot(122)
#在第一个画面中画出序列的自相关系数图
plot1 = plot_acf(CPItrain, lags=30, ax=axe1)
#在第二个画面中画出序列的偏自相关系数图
plot2 = plot_pacf(CPItrain, lags=30, ax=axe2)

#order表示建立的模型的阶数,c(1,0,1)表示建立的是ARMA(1,1)模型;
#中间的数字0表示使用原始的、未进行过差分(差分次数为0)的数据;
#此处我们无需考虑它
model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit()
model1.summary()

#同理,我们建立起其它阶数的模型
model2 = arima_model.ARIMA(CPItrain, order=(1, 0, 2)).fit()
model2.summary()
model3 = arima_model.ARIMA(CPItrain, order=(2, 0, 1)).fit()
model4 = arima_model.ARIMA(CPItrain, order=(2, 0, 2)).fit()
model5 = arima_model.ARIMA(CPItrain, order=(3, 0, 1)).fit()
model6 = arima_model.ARIMA(CPItrain, order=(3, 0, 2)).fit()

model6.conf_int()
#绘制时间序列模拟的诊断图
stdresid = model6.resid / math.sqrt(model6.sigma2)
plt.plot(stdresid)
plot_acf(stdresid, lags=20)
예제 #11
0
##自相关系数
acfs=stattools.acf(SH_ret)

##偏自相关系数
pacfs=stattools.pacf(SH_ret)

plot_acf(SH_ret,use_vlines=True,lags=30)

SH_ret.plot()
plt.title('return')

SH_close.plot()
plt.title('close price')


adfSH_ret=ADF(SH_ret)
print(adfSH_ret)

adfSH_close=ADF(SH_close)
print(adfSH_close)

 LjungBox_ret=stattools.q_stat(acfs,len(SH_ret))
 LjungBox_ret[1][-1]

HS300_data['close']['2010-01-01':'2016-12-31']
HS300_data['2010-01-01':'2016-12-31']

model1=arima_model.ARIMA(SH_ret,order=(2,0,1)).fit()
model1.summary()

stattools.arma_order_select_ic(SH_ret,max_ma=4)
def get_arima_coefficients(series, order=(2, 1, 3)):
    """
    Returns the ARIMA model coefficients for the given model with order (p, d, q)
    """
    model = arima_model.ARIMA(series, order).fit(disp=False)
    return model.params
예제 #13
0
# # dta = sm.datasets.sunspots.load_pandas().data[['SUNACTIVITY']]
# # dta.index = pd.DatetimeIndex(start='1700', end='2009', freq='A')

# dta = df
# res = sm.tsa.ARMA(dta, (3, 0)).fit()
# fig, ax = plt.subplots()
# ax = dta.ix['1950':].plot(ax=ax)
# fig = res.plot_predict('2015', '2016', dynamic=True, ax=ax,
#                         plot_insample=False)
# plt.show()

# from statsmodels.tsa.arima_model import _arima_predict_out_of_sample
# res = sm.tsa.ARIMA(data[:,0], (3,1, 2)).fit()

model = ari.ARIMA(data[:, 0], order=(3, 0, 2))
ar_res = model.fit()
preds = ar_res.predict(3000, 3900, dynamic=True)
plt.plot(preds)
plt.show()

# get what you need for predicting one-step ahead
# params = res.params
# residuals = res.resid
# p = res.k_ar
# q = res.k_ma
# k_exog = res.k_exog
# k_trend = res.k_trend
# steps = 1

# statsmodels.tsa.arima_model.ARIMA.predict(params, start=len(data[:,0]))
예제 #14
0
 endo = a['mood_mean']
 P = [1, 2, 4, 6]
 D = range(0, 2)
 Q = range(0, 2)
 a1 = a
 best_score, best_cfg, best_predict, best_aic = float(
     'inf'), None, pd.DataFrame(), float('inf')
 for p in P:
     for d in D:
         for q in Q:
             orders = (p, d, q)
             exog = a[ex_terms]
             endo = a['mood_mean']
             try:
                 model = stat.ARIMA(
                     endog=endo, exog=exog, freq='d',
                     order=orders)  #order is the parameter (p,d,q)
                 model_ = model.fit(disp=0)
                 a1['pred'] = model_.predict(start=split_point,
                                             exog=exog,
                                             dynamic=True)
                 aic = model_.aic
                 a1.fillna(
                     a1.mean(), inplace=True
                 )  #fill Nan with mean first to inver_transform to original value
                 truth = a['mood_mean'].iloc[split_point:length]
                 predict = a1['pred'].iloc[split_point:length]
                 MSE = mean_squared_error(truth, predict)
                 if MSE < best_score:
                     best_predict = model_.predict(start=split_point,
                                                   exog=exog,
예제 #15
0
def many_paras():
    """
    p-value本质是控制假阳性率(False positive rate,FPR)
    q-value 控制的是FDR (false discovery rate)
    Q-statistic: Qlb=T*(T+2)*sigma(j=1,p)(rj^2/(T-j))  rj残差序列,j阶自相关系数,T观测值的个数,p滞后阶数。
    FDR = E(V/R) 错误发现次数V,总的拒绝次数R
    acf: 自相关系数 -- y(t)= a0 + a1*y(t-1) + epsilon
         p(x(i)|x(i-h)) :sigma(i=1,n-h) ((x(i)-mu)*(x(i+h)-mu)/sigma(i=1,n) ((x(i)-mu)^2))
    pacf: 偏自相关系数,k-1个时间滞后 作为已知,只求k -- y(t)= a0 + a1*y(t-1) + ... a1*y(t-k) + epsilon
         p(x(i)..x(i-k)|x(i-1)x(i-k+1)) :
    ARMA(p,q): AR代表p阶自回归过程,MA代表q阶移动平均过程
    ARIMA模型是在ARMA模型的基础上多了差分的操作。
    ADF: 白噪声随机干扰项的一阶自回归过程。用单位根 检验,存在就是非平稳。y(t)= mu + fi*y(t-1) + epsilon。p阶要求 p个根的和小于1。
    Sma: 移动平均  
    wma: 加权移动平均
    ema: 指数移动平均
    ewma: 指数加权移动平均
    OBV: On Balance Volume, 多空比率净额= [(收盘价-最低价)-(最高价-收盘价)] ÷( 最高价-最低价)×V
    :return: 
    """
    # 1. 计算自相关系数
    acfs = stattools.acf(SHRet)
    # 绘制自相关系数图
    plot_acf(SHRet, use_vlines=True, lags=30)
    # 2. 计算偏自相关系数
    pacfs = stattools.pacf(SHRet)
    plot_pacf(SHRet, use_vlines=True, lags=30)
    # 3. 进行ADF单位根检验,并查看结果;
    adfSHRet = ADF(SHRet)
    print(adfSHRet.summary().as_text())
    # 4. Q 统计
    LjungBox1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet))
    print(LjungBox1)
    # 5. lag即为上述检验表达式中的m,在这里我们选择检验12阶的自相关系数。
    LjungBox = stattools.q_stat(stattools.acf(CPItrain)[1:12], len(CPItrain))
    # order表示建立的模型的阶数,c(1,0,1)表示建立的是ARMA(1,1)模型;
    # 中间的数字0表示使用原始的、未进行过差分(差分次数为0)的数据;
    model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit()
    model1.summary()
    model1.conf_int()
    # 6. 绘制时间序列模拟的诊断图
    stdresid = model1.resid / math.sqrt(model1.sigma2)
    plt.plot(stdresid)
    plot_acf(stdresid, lags=20)
    LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:13], len(stdresid))
    print(LjungBox[1][-1])
    print(model1.forecast(3)[0])
    # 7. Autoregressive conditional heteroskedasticity model 自回归条件异方差模型
    # y(t)=b*x(t)+epsilon(t)
    # epsilon(t)^2=a0+a1*epsilon(t-1)^2+a2*epsilon(t-2)^2+n(t)
    # \sigma_t^{2}=\omega+\sum_{i=1}^{p}\alpha_{i}\epsilon_{t-i}^{2}
    # n(t)独立同分布 期望为0,var(n^2)=r^2
    am = arch_model(SHret)
    model = am.fit(update_freq=0)
    print(model.summary())
    # 8. 对子 的 处理
    pt = TradeTool()
    SSD = pt.SSD(priceAf, priceBf)
    SSDspread = pt.SSDSpread(priceAf, priceBf)
    SSDspread.describe()
    coefficients = pt.cointegration(priceAf, priceBf)
    CoSpreadF = pt.CointegrationSpread(priceA, priceB, formPeriod, formPeriod)
    CoSpreadTr = pt.CointegrationSpread(priceA, priceB, formPeriod,
                                        tradePeriod)
    CoSpreadTr.describe()
    bound = pt.calBound(priceA, priceB, 'Cointegration', formPeriod, width=1.2)
    # 9. 配对 选点
    trtl = TradeTool()
    account = trtl.TradeSimPair(PAt, PBt, position)

    # 10. momentum function
    et = ElementTool()
    et.momentum(Close, 5).tail(n=5)
    momen35 = et.momentum(Close, 35)
    signal = []
    for i in momen35:
        if i > 0:
            signal.append(1)
        else:
            signal.append(-1)
    signal = pd.Series(signal, index=momen35.index)
    signal.head()
    tradeSig = signal.shift(1)
    ret = Close / Close.shift(1) - 1
    # ret=ret['2014-02-20':]
    # ret.head(n=3)
    Mom35Ret = ret * (signal.shift(1))
    Mom35Ret[0:5]
    real_Mom35Ret = Mom35Ret[Mom35Ret != 0]
    real_ret = ret[ret != 0]

    Rsi12 = et.rsi(BOCMclp, 12)
    # 策略
    rsi6 = et.rsi(BOCMclp, 6)
    rsi24 = et.rsi(BOCMclp, 24)
    # rsi6捕捉买卖点
    Sig1 = []
    for i in rsi6:
        if i > 80:
            Sig1.append(-1)
        elif i < 20:
            Sig1.append(1)
        else:
            Sig1.append(0)

    date1 = rsi6.index
    Signal1 = pd.Series(Sig1, index=date1)
    Signal1[Signal1 == 1].head(n=3)
    Signal1[Signal1 == -1].head(n=3)

    Signal2 = pd.Series(0, index=rsi24.index)
    lagrsi6 = rsi6.shift(1)
    lagrsi24 = rsi24.shift(1)
    for i in rsi24.index:
        if (rsi6[i] > rsi24[i]) & (lagrsi6[i] < lagrsi24[i]):
            Signal2[i] = 1
        elif (rsi6[i] < rsi24[i]) & (lagrsi6[i] > lagrsi24[i]):
            Signal2[i] = -1

    signal = Signal1 + Signal2
    signal[signal >= 1] = 1
    signal[signal <= -1] = -1
    signal = signal.dropna()
    tradSig = signal.shift(1)

    tt = TradeTool()
    BuyOnly = tt.strategy_analy(buy, ret)
    SellOnly = tt.strategy_analy(sell, ret)
    Trade = tt.strategy_analy(tradSig, ret)
    Test = pd.DataFrame({
        "BuyOnly": BuyOnly,
        "SellOnly": SellOnly,
        "Trade": Trade
    })

    # 累计收益率
    cumStock = np.cumprod(1 + ret) - 1
    cumTrade = np.cumprod(1 + tradeRet) - 1

    # 12. 移动平均线
    sma5 = et.smaCal(Close, 5)
    # 12. 加权移动平均线
    wma5 = et.wmaCal(Close, w)
    # 12. 指数移动平均线
    Ema = et.emaCal(Close, period)
    print(Ema)
    # 12. 指数加权移动平均线
    Ewma = et.ewmaCal(Close, 5, 0.2)

    # 13. 布林带
    UnicomBBands = et.bbands(Close, 20, 2)
    print(UnicomBBands)
    multiplier = [1, 1.65, 1.96, 2, 2.58]
    price2010 = Close['2010-01-04':'2010-12-31']
    tt.CalBollRisk(price2010, multiplier)

    # 14. 性能
    btt = BackTestTool()
    Performance1 = btt.perform(Close, tradSignal1)
    print(Performance1)

    # 15. 交易, 回测
    KDtrade = btt.trade(KDSignal, close)
    btt.backtest(KDtrade.Ret, KDtrade.KDtradeRet)

    # 16. 上下突破
    KDupbreak = et.upbreak(KValue, DValue) * 1
    KDupbreak[KDupbreak == 1].head()

    KDdownbreak = et.downbreak(KValue, DValue) * 1
    KDdownbreak[KDdownbreak == 1].head()
    # "金叉"与"死叉"交易策略绩效表现
    btt.backtest(KDbreak.Ret, KDbreak.KDbreakRet)

    # 17. 成交量指标
    cumUpVol = et.VOblock(UpVol)
    cumDownVol = et.VOblock(DownVol)
    ALLVol = np.array([cumUpVol, cumDownVol]).transpose()

    # 18. 判断持有
    hold = tt.judge_hold(trade)

    # 19. 单交易
    TradeAccount = tt.TradeSim(close, hold)
    print(TradeAccount)
예제 #16
0
plot_acf(clprice, lags=20)
from arch.unitroot import ADF
adf = ADF(clprice, lags=6)
print(adf.summary().as_text())

logReturn = pd.Series((np.log(clprice))).diff().dropna()
logReturn.plot()

adf = ADF(logReturn, lags=6)
print(adf.summary().as_text())

plot_acf(logReturn, lags=20)
plot_pacf(logReturn, lags=20)

from statsmodels.tsa import arima_model
model1 = arima_model.ARIMA(logReturn.values, order=(0, 0, 1)).fit()
model1.summary()

model2 = arima_model.ARIMA(logReturn.values, order=(1, 0, 0)).fit()
model2.summary()

#8.
baiyun = zgsy = pd.read_csv('Data/Part4/003/baiyun.csv', index_col='Date')
baiyun.index = pd.to_datetime(baiyun.index)
clprice = baiyun.Close

logReturn = pd.Series((np.log(clprice))).diff().dropna()
logReturn.plot()

adf = ADF(logReturn, lags=6)
print(adf.summary().as_text())
예제 #17
0
cpi = ts.get_cpi()
cpi.index = pd.to_datetime(cpi['month'])
cpi = cpi['cpi']
cpitrain = cpi['2016-01-01':'2000-01-01']
cpitrain.plot()
#是否平稳
print(ADF(cpitrain,max_lags=10).summary().as_text())
#是否白噪声
ljb0 = stattools.q_stat(stattools.acf(cpitrain)[1:12],len(cpitrain))
ljb0[1][-1]

#识别ARMA模型参数pq
plot_acf(cpitrain,use_vlineEs=True,lags=30)
plot_pacf(cpitrain,use_vlines=True,lags=30)

model1 = arima_model.ARIMA(cpitrain.values,order=(1, 0, 1)).fit()
model1.summary()

p = np.arange(1,4)
q = np.arange(1,4)
result = dict()
for i in p:
    for j in q:
        model1 = arima_model.ARIMA(cpitrain.values, order=(i, 0, j)).fit()
        result[(i,j)] = (model1.aic, model1.bic)

bestmodel = arima_model.ARIMA(cpitrain.values,order=(3, 0, 2)).fit()
stdresid = bestmodel.resid/math.sqrt(bestmodel.sigma2)
plt.plot(stdresid)
plot_acf(stdresid,lags=12)
ljb = stattools.q_stat(stattools.acf(stdresid)[1:12],len(stdresid))
예제 #18
0
def ARIMA(series, n, name):
    #一阶差分的ARIMA模型
    series = np.array(series)
    series = pd.Series(series.reshape(-1))
    currentDir = os.getcwd()  #当前工作路径
    #一阶差分数据
    fd = series.diff(1)[1:]
    #plot_acf(fd).savefig('./'+ name +'一阶差分自相关图.png')
    #plot_pacf(fd).savefig('./'+ name +'一阶差分偏自相关图.png')
    #一阶差分单位根检验
    unitP = adfuller(fd)[1]
    if unitP > 0.05:
        unitAssess = '单位根检验中p值为%.2f,大于0.05,认为该一阶差分序列判断为非平稳序列' % (unitP)
    else:
        unitAssess = '单位根检验中p值为%.2f,小于0.05,认为该一阶差分序列判断为平稳序列' % (unitP)
    #白噪声检验
    noiseP = acorr_ljungbox(fd, lags=1)[-1]
    if noiseP <= 0.05:
        noiseAssess = '白噪声检验中p值为%.2f,小于0.05,认为该一阶差分序列为非白噪声' % noiseP
    else:
        noiseAssess = '白噪声检验中%.2f,大于0.05,认为该一阶差分序列为白噪声' % noiseP
    #BIC准则确定p、q值
    pMax = 3
    qMax = pMax
    bics = list()
    for p in range(pMax + 1):
        tmp = list()
        for q in range(qMax + 1):
            try:
                tmp.append(arima_model.ARIMA(series, (p, 1, q)).fit().bic)
            except Exception as e:
                #print(str(e))
                tmp.append(1e+10)  #加入一个很大的数
        bics.append(tmp)
    bics = pd.DataFrame(bics)
    p, q = bics.stack().idxmin()
    print('BIC准则下确定p,q为%s,%s' % (p, q))
    model = arima_model.ARIMA(series, order=(p, 1, q))
    result = model.fit()
    print(result.summary())
    predict = result.forecast(n)[0]

    #model_a = pf.ARIMA(d1[s:e],ar=p,ma=q,integ=0)
    #x= model_a.fit()
    #model_a.plot_fit()
    #model_a.plot_predict(h=20,past_values=50)

    return {
        'model': {
            'value': model,
            'desc': '模型'
        },
        'unitP': {
            'value': unitP,
            'desc': unitAssess
        },
        'noiseP': {
            'value': noiseP[0],
            'desc': noiseAssess
        },
        'p': {
            'value': p,
            'desc': 'AR模型阶数'
        },
        'q': {
            'value': q,
            'desc': 'MA模型阶数'
        },
        'predict': {
            'value': predict,
            'desc': '往后预测%d个的序列' % (n)
        }
    }
    file_name = './temp/discdata_processed.xls'
    # file_name = './data/discdata_processed.xls'
    data = pd.read_excel(file_name, index_col='COLLECTTIME')

    # 最后5条数据作为验证数据
    train_data = data.iloc[:-5, :]
    test_data = data.iloc[-5:, :]
    # print(test_data)
    xdata = train_data['CWXT_DB:184:D:\\']

    # 定阶
    pmax = len(xdata) // 10
    qmax = len(xdata) // 10

    # 建立训练模型
    arima = arima_model.ARIMA(xdata, (0, 1, 1)).fit()
    xdata_pred = arima.predict(typ='levels')
    # 计算残差
    pred_error = (xdata_pred - xdata).dropna()

    # 再次检验白噪声
    lb, p = diagnostic.acorr_ljungbox(pred_error, lags=12)

    h = (p < 0.05).sum()
    if h > 0:
        print(u'模型ARIMA(0,1,1)不符合白噪声检验')
    else:
        print(u'模型ARIMA(0,1,1)符合白噪声检验')

    # 预测接下来的5天的数据
    test_pred = arima.forecast(5)[0]
예제 #20
0
def ts_fit_arima(ts, order):
	return arima.ARIMA(ts, order=order).fit(disp=0)
예제 #21
0
        df_scan = df_new.iloc[:k]
        best_aic = np.inf
        best_order = None
        best_mdl = None
        #print(test[sym][-1])
        pq_rng = range(4)  # [0,1,2,3]
        d_rng = range(2)  # [0,1]
        #train.dropna(inplace= True)
        for i in pq_rng:
            for d in d_rng:
                for j in pq_rng:
                    #                                   if(k>10):
                    try:
                        #                                     print("no error")
                        tmp_mdl = smt.ARIMA(df_scan,
                                            order=(i, d, j)).fit(method='mle',
                                                                 trend='nc',
                                                                 disp=0)
                        #print(tmp_mdl.aic)
                        tmp_aic = tmp_mdl.aic
                        #print(tmp_aic)
                        if tmp_aic < best_aic:
                            best_aic = tmp_aic
                            best_order = (i, d, j)
                            best_mdl = tmp_mdl
                    except:
                        continue

        #print(best_mdl)
        if best_mdl is None:
            #print("bakwas")
            temp = temp + 1
예제 #22
0
 def fit_row(self, row):
     model = arima_model.ARIMA(row, (self.p, self.d, self.q)).fit()
     pred = model.predict()
     params = model.params
     return (pred, params)
예제 #23
0
    # plt.plot(X)
    sm.graphics.tsa.plot_acf(X)


visual_stable(X)
visual_stable(X, 1)
visual_stable(X, 2)

d = 1

# Determine p, q
sm.graphics.tsa.plot_acf(np.diff(X, n=d))
sm.graphics.tsa.plot_pacf(np.diff(X, n=d))

# Fit with ARIMA(p, d, q)
model = ARIMA_model.ARIMA(X, (1, d, 3))
result = model.fit()
print(result.summary())

# Residual test
# Note that the prediction loses d of data, so the original data
# should drop first-d elements to match.
residual = np.delete(X, range(d)) - result.predict(typ='levels')
result.plot_predict()

_, Q, p = stattools.acf(residual, qstat=True)
Q6, Q12 = Q[5], Q[12]
p6, p12 = p[5], p[12]
if p6 < 0.05 or p12 < 0.05:
    sad('Bad! Not a white noise, test failed')
    if p6 >= 0.05: