Esempio n. 1
0
def get_pdq(time_series):
    plot_acf(time_series)
    plot_pacf(time_series)
    plt.show()
    r, rac, Q = sm.tsa.acf(time_series, qstat=True)
    prac = pacf(time_series, method='ywmle')
    table_data = np.c_[range(1, len(r)), r[1:], rac, prac[1:len(rac) + 1], Q]
    table = pd.DataFrame(table_data,
                         columns=['lag', "AC", "Q", "PAC", "Prob(>Q)"])
    print(table)
Esempio n. 2
0
def plot_acf(series,
             ax=None,
             lags=None,
             alpha=None,
             use_vlines=True,
             unbiased=False,
             fft=True,
             title='Autocorrelation',
             zero=True,
             vlines_kwargs=None,
             show=True,
             **kwargs):
    """Plot a series' auto-correlation as a line plot.

    A wrapper method for the statsmodels ``plot_acf`` method.

    Parameters
    ----------
    series : array-like, shape=(n_samples,)
        The series or numpy array for which to plot an auto-correlation.

    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.

    lags : int, array-like or None, optional (default=None)
        int or Array of lag values, used on horizontal axis. Uses
        np.arange(lags) when lags is an int.  If not provided,
        ``lags=np.arange(len(corr))`` is used.

    alpha : scalar, optional (default=None)
        If a number is given, the confidence intervals for the given level are
        returned. For instance if alpha=.05, 95 % confidence intervals are
        returned where the standard deviation is computed according to
        Bartlett's formula. If None, no confidence intervals are plotted.

    use_vlines : bool, optional (default=True)
        If True, vertical lines and markers are plotted.
        If False, only markers are plotted.  The default marker is 'o'; it can
        be overridden with a ``marker`` kwarg.

    unbiased : bool, optional (default=False)
        If True, then denominators for autocovariance are n-k, otherwise n

    fft : bool, optional (default=True)
        If True, computes the ACF via FFT.

    title : str, optional (default='Autocorrelation')
        Title to place on plot. Default is 'Autocorrelation'

    zero : bool, optional (default=True)
        Flag indicating whether to include the 0-lag autocorrelation.
        Default is True.

    vlines_kwargs : dict, optional (default=None)
        Optional dictionary of keyword arguments that are passed to vlines.

    show : bool, optional (default=True)
        Whether to show the plot after it's been created. If not, will return
        the plot as an Axis object instead.

    **kwargs : kwargs, optional
        Optional keyword arguments that are directly passed on to the
        Matplotlib ``plot`` and ``axhline`` functions.

    Notes
    -----
    This method will only show the plot if ``show=True`` (which is the default
    behavior). To simply get the axis back (say, to add to another canvas),
    use ``show=False``.

    Examples
    --------
    >>> plot_acf([1, 2, 3], show=False)  # doctest: +SKIP
    <matplotlib.figure.Figure object at 0x122fab4e0>

    Returns
    -------
    plt : Axis or None
        If ``show`` is True, does not return anything. If False, returns
        the Axis object.
    """
    if plt is None:
        warn_for_no_mpl()
        return None

    res = pacf(x=series,
               ax=ax,
               lags=lags,
               alpha=alpha,
               use_vlines=use_vlines,
               unbiased=unbiased,
               fft=fft,
               title=title,
               zero=zero,
               vlines_kwargs=vlines_kwargs,
               **kwargs)

    return _show_or_return(res, show)
Esempio n. 3
0
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf, acf, pacf

# sourcing df from Data Preparation
from Data_prep.Data_preparation import MSFTdf

# exporting ACF and PACF
acf_plot = plot_acf(MSFTdf.IntChange)
acf_vals = acf(MSFTdf.IntChange)
plt.bar(range(31), acf_vals[:31])
#plt.savefig('MSFT_ACF')

pacf_plot = plot_pacf(MSFTdf.IntChange)
pacf_vals = pacf(MSFTdf.IntChange)
plt.bar(range(31), pacf_vals[:31])
#plt.savefig('MSFT_PACF')

plt.show()
output['value']['Number of Observations Used'] = t[3]
output['value']['Critical Value(1%)'] = t[4]['1%']
output['value']['Critical Value(5%)'] = t[4]['5%']
output['value']['Critical Value(10%)'] = t[4]['10%']
print(output)
## 差分之后的序列基本达到稳定,且通过了 ADF 检验。

## 确定自相关系数和平均移动系数(p,q)
## 根据时间序列的识别规则,采用 ACF 图、PAC 图,AIC 准则和 BIC 准则相结合的方式来确定 ARMA 模型的阶数, 应当选取 AIC 和 BIC 值达到最小的那一组为理想阶数。
# 5) p,q
plot_acf(time_series)
plot_pacf(time_series)
plt.show()

r, rac, Q = sm.tsa.acf(time_series, qstat=True)
prac = pacf(time_series, method='ywmle')
table_data = np.c_[range(1, len(r)), r[1:], rac, prac[1:len(rac) + 1], Q]
table = pd.DataFrame(table_data, columns=['lag', "AC", "Q", "PAC", "Prob(>Q)"])

print(table)

#自动取阶p和q 的最大值,即函数里面的max_ar,和max_ma。
#ic 参数表示选用的选取标准,这里设置的为aic,当然也可以用bic。然后函数会算出每个 p和q 组合(这里是(0,0)~(3,3)的AIC的值,取其中最小的。

(p, q) = (sm.tsa.arma_order_select_ic(time_series,
                                      max_ar=3,
                                      max_ma=3,
                                      ic='aic')['aic_min_order'])
print((p, q))

# 6) ARIMA(0,1,1)
Esempio n. 5
0
lag_acf = acf(df["df_log_shift"].dropna(), nlags=50)
plt.figure(figsize=(16, 7))
plt.plot(lag_acf, marker="o")
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(df["df_log_shift"].dropna())),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(df["df_log_shift"].dropna())),
            linestyle='--',
            color='gray')
plt.title('Autocorrelation Function')
plt.xlabel('number of lags')
plt.ylabel('correlation')
plt.tight_layout()

lag_pacf = pacf(df["df_log_shift"].dropna(), nlags=50, method='ols')

#PLOT PACF
plt.figure(figsize=(16, 7))
plt.plot(lag_pacf, marker="o")
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(df["df_log_shift"].dropna())),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(df["df_log_shift"].dropna())),
            linestyle='--',
            color='gray')
plt.title('Partial Autocorrelation Function')
plt.xlabel('number of lags')
plt.ylabel('correlation')
plt.tight_layout()
Esempio n. 6
0
    8451, 9815, 10894, 10287, 9666, 6072, 5418
])
data.index = pd.Index(sm.tsa.datetools.dates_from_range('1901', '1990'))
# 绘制数据图
data.plot(figsize=(12, 8))
plt.show()

# 根据AIC准则自动定阶
# (p, q) =(sm.tsa.arma_order_select_ic(data,max_ar=10,max_ma=10,ic='aic')['aic_min_order'])
# print((p,q))
# 根据自相关图、偏自相关图定阶
plot_acf(data)  # 自相关图
plot_pacf(data)  # 偏自相关图
plt.show()
r, rac, Q = sm.tsa.acf(data, qstat=True)
prac = pacf(data, method='ywmle')
table_data = np.c_[range(1, len(r)), r[1:], rac, prac[1:len(rac) + 1], Q]
table = pd.DataFrame(table_data, columns=['lag', "AC", "Q", "PAC", "Prob(>Q)"])
print(table)

# 模型结果生成
p, d, q = (7, 0, 7)
arma_mod = ARMA(data, (p, d, q)).fit(disp=-1, method='mle')
summary = (arma_mod.summary2(alpha=.05, float_format="%.8f"))
print(summary)

# 创建 ARMA 模型 # 创建 ARMA 模型
arma = ARMA(data, (7, 0)).fit()
print('AIC: %.4lf' % arma.aic)
# 模型预测
predict_y = arma.predict('1990', '2000')
Esempio n. 7
0
######## Step 8 #########

# The new bitcoin dataset has been formed. From now on we will be having only this dataset
bitcoin = bitcoin[bitcoin['Date'] >= "2017-01-01"]
bitcoin['bprice'].plot()
# plot8 = plt.plot(bitcoin['Date'],bitcoin['bprice'])

######## Step 9 #########

# acf and pacf gives the relationship between today and yesterday
from statsmodels.graphics.tsaplots import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

acf(bitcoin['dbprice'].dropna())
pacf(bitcoin['dbprice'].dropna())

# Plots for acf and pacf
plot_acf(bitcoin['dbprice'].dropna())
plot_pacf(bitcoin['dbprice'].dropna())

######## Step 10 #########

from statsmodels.tsa.arima_model import ARIMA

x = bitcoin['doil']
bigX = sm.add_constant(
    pd.concat((x, bitcoin['deuro'], bitcoin['dgold'], bitcoin['dsp']), 1))[1:]
x = x[1:]
y = bitcoin['dbprice'][1:]
model10 = sm.OLS(y, bigX).fit()
Esempio n. 8
0
def shibor_ARIMA():
    data = shiborData

    data.to_csv("shibor_data.csv")

    df = pd.read_csv('shibor_data.csv')

    global shibor_term_set
    shibor_term = ['shiborON', 'shibor1W', 'shibor2W', 'shibor1M', 'shibor3M', 'shibor6M', 'shibor9M', 'shibor1Y']
    shibor_term_set = shibor_term[0]   #从0开始,不同的序号表示要处理的不同期限产品

    dta = pd.Series(df[shibor_term_set].values, index=df['date'].values)
     
    plt.show()

    # 2时间序列的差分d
    fig = plt.figure(figsize=(12,8))
    ax1= fig.add_subplot(111)
    diff1 = dta.diff(1)
    sum_nan_diff1 =  diff1.isnull().sum().sum()
    print('在我们diff1中NaN的数量:', sum_nan_diff1)
    where_are_nan = np.isnan(diff1)  #定位nan;用0替换nan
    diff1[where_are_nan] = 0
    diff1.plot(ax=ax1)
    #以下是通过了 ADF 检验,查看序列是否平稳。通过观察t统计量是否小于置信度的临界值
    t=sm.tsa.stattools.adfuller(dta)
    # t=sm.tsa.stattools.adfuller(diff1)
    output=pd.DataFrame(index=['Test Statistic Value', "p-value", "Lags Used", "Number of Observations Used","Critical Value(1%)","Critical Value(5%)","Critical Value(10%)"],columns=['value'])
    output['value']['Test Statistic Value'] = t[0]
    output['value']['p-value'] = t[1]
    output['value']['Lags Used'] = t[2]
    output['value']['Number of Observations Used'] = t[3]
    output['value']['Critical Value(1%)'] = t[4]['1%']
    output['value']['Critical Value(5%)'] = t[4]['5%']
    output['value']['Critical Value(10%)'] = t[4]['10%']
    print(output)
    #确定自相关系数和平均移动系数(p,q)
    #根据时间序列的识别规则,采用 ACF 图、PAC 图,AIC 准则(赤道信息量准则)和 BIC 准则(贝叶斯准则)
    #相结合的方式来确定 ARMA 模型的阶数, 应当选取 AIC 和 BIC 值达到最小的那一组为理想阶数。
    plot_acf(diff1)
    plot_pacf(diff1)
    plt.show()

    r,rac,Q = sm.tsa.acf(diff1, qstat=True)
    prac = pacf(diff1,method='ywmle')
    table_data = np.c_[range(1,len(r)), r[1:],rac,prac[1:len(rac)+1],Q]
    table = pd.DataFrame(table_data, columns=['lag', "AC","Q", "PAC", "Prob(>Q)"])

    print('table',table)
    
    (p, q) =(sm.tsa.arma_order_select_ic(diff1,max_ar=3,max_ma=3,ic='aic')['aic_min_order'])
    #这里需要设定自动取阶的 p和q 的最大值,即函数里面的max_ar,和max_ma。
    #ic 参数表示选用的选取标准,这里设置的为aic,当然也可以用bic。
    #然后函数会算出每个 p和q 组合(这里是(0,0)~(3,3)的AIC的值,取其中最小的.

    print('p = %d , q = %d' %(p,q))

    #预测
    arima_model = sm.tsa.ARIMA(dta,(8,1,0)).fit()
    # predict_data = arma_model.predict(start=str(1979), end=str(2010+3), dynamic = False)
    predict_data = arima_model.predict(start="2019-10-25", end="2019-11-06",dynamic=True,typ='levels')
    shibor_forcast = pd.concat([dta['2019-09-21':"2019-11-06"],predict_data],axis=1,keys=['original', 'predicted'])  
    #将原始数据和预测数据相结合,使用keys来分层
    plt.figure()

    plt.plot(shibor_forcast)

    plt.title(shibor_term_set+' 真实值vs预测值')

    plt.xticks(rotation=50)
    plt.show()
Esempio n. 9
0
File: arima.py Progetto: lbship/Blog
print('差分序列的ADF')
print(ADF(diff_1_df))
#(-7.296820308000623, p1 1.3710560053434156e-10, 0, 66,
# {'1%': -3.5335601309235605, '5%': -2.9064436883991434, '10%': -2.590723948576676}, 1306.8499722912552)
# (-0.913770472695999, 0.7834037847008933, 4, 61,
# {'1%': -3.542412746661615, '5%': -2.910236235808284, '10%': -2.5927445767266866}, 1294.1719644274262)
print('差分序列的白噪声检验结果')
# 大于0.05为白噪声序列
print(acorr_ljungbox(diff_1_df, lags=1))
# (array([0.69570612]), P1值array([0.40423027]))
#(array([18.06943954]), P2array([2.12992775e-05]))
#(array([5.69722348]),  logp1 array([0.01699177]))

# # 给出最优p q值 ()
r, rac, Q = sm.tsa.acf(diff_1_df, qstat=True)
prac = pacf(diff_1_df, method='ywmle')
table_data = np.c_[range(1, len(r)), r[1:], rac, prac[1:len(rac) + 1], Q]
table = pd.DataFrame(table_data, columns=['lag', "AC", "Q", "PAC", "Prob(>Q)"])
order = sm.tsa.arma_order_select_ic(diff_1_df,
                                    max_ar=7,
                                    max_ma=7,
                                    ic=['aic', 'bic', 'hqic'])
p, q = order.bic_min_order
print("p,q")
print(p, q)

# 建立ARIMA(0, 1, 1)模型
order = (p, 1, q)
train_X = diff_1_df[:]
arima_model = ARIMA(train_X, order).fit()
Esempio n. 10
0
#1.Transform between moving average and ts_log
logAndMA = tsLog - movingAverage
logAndMA.dropna(inplace=True)
#rollingStatPlot(logAndMA)

#2.Difference between logs (d=1)
diff = tsLog - tsLog.shift()
diff.dropna(inplace=True)
#rollingStatPlot(diff)

#find p and q
from statsmodels.graphics.tsaplots import acf, pacf

acfLag = acf(diff, nlags=20)
pacfLag = pacf(diff, nlags=20, method='ols')

plt.subplot(121)
plt.plot(acfLag)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')

#Plot PACF:
plt.subplot(122)
plt.plot(pacfLag)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(diff)), linestyle='--', color='gray')
plt.title('Partial Autocorrelation Function')
Esempio n. 11
0
    def createarima(self, dataconfig):
        with open(dataconfig) as f:
            dataconfigfile = yaml.load(f, Loader=FullLoader)
        metrics = pd.DataFrame(columns=[
            'modelname', 'mean_absolute_error', 'mean_squared_error',
            'r2_score', 'mean_squared_log_error'
        ])

        data = pd.read_csv(dataconfigfile["clean_data_address"])
        location = dataconfigfile["location"]
        choice = dataconfigfile['frequency']
        diction = {
            "D": 7,
            "W": 52,
            "M": 12,
            "Q": 4,
            "Y": 2,
        }
        freq = 24
        if choice in diction:
            freq = diction[choice]
        else:
            freq = 12
        print("frequency", freq)
        with open("logs.log", "a+") as f:
            f.write("Frequency=" + str(freq) + "\n")
            f.write("Creating Arima models\n")
            f.write("Please wait trying different models...\n")
            f.write("Trained on several models\n")
            f.write("Selecting best model\n")
            f.close()
        # warnings.filterwarnings("ignore")
        # sys.stdout=open("logs.log","a+")
        with StepwiseContext(max_dur=15):
            model = pm.auto_arima(data,
                                  stepwise=True,
                                  error_action='ignore',
                                  seasonal=True,
                                  m=freq,
                                  trace=True)
        # sys.stdout.close()
        #metrics=met.calculate_metrics("fbprophet","Regression",testpred,testactual)
        order = model.get_params(deep=False)['order']
        seasonal = model.get_params(deep=False)['seasonal_order']
        print("order=", order)
        print("seasonal", seasonal)
        print("frequency", freq)
        modelfinal = SARIMAX(data, order=order, seasonal_order=seasonal).fit()

        start = 1
        end = len(data)
        compare = modelfinal.predict(start=start, end=end, typ='levels')

        compare.index = data.index

        metrics_new_row = met.calculate_metrics("arima", "Regression",
                                                data['y'], compare)
        metricsLocation = os.path.join(dataconfigfile["location"],
                                       "metrics.csv")
        metrics.loc[len(metrics.index)] = metrics_new_row
        metrics.to_csv(metricsLocation, index=True)
        r2score = metrics_new_row[3]

        fig = go.Figure()
        fig.add_trace(go.Scatter(x=data.index, y=data.y, name="actual"))

        fig.add_trace(
            go.Scatter(x=compare.index, y=compare, name="predictions"))

        plotlocation = dataconfigfile['location']
        plotlocation = os.path.join(plotlocation, "plot.html")
        acf_ = acf(data['y'])
        acf_ = pd.DataFrame(acf_, columns=['data'])
        pacf_ = pacf(data['y'])
        pacf_ = pd.DataFrame(pacf_, columns=['data'])
        fig2 = self.plot_graphs(acf_, "Auto correlative function")
        fig3 = self.plot_graphs(pacf_, "Partial-Auto correlative funtion")
        with open(plotlocation, 'a') as f:
            f.write(fig.to_html(include_plotlyjs='cdn', full_html=False))
            f.write(fig2.to_html(include_plotlyjs='cdn', full_html=False))
            f.write(fig3.to_html(include_plotlyjs='cdn', full_html=False))
        f.close()

        # modelfinal=auto_arima(data['y'], trace=True,suppress_warnings=True, seasonal=True)
        location = os.path.join(dataconfigfile["location"],
                                str(dataconfigfile["id"]) + "_model")
        os.makedirs(location)
        name = str(dataconfigfile["experimentname"]) + str(
            dataconfigfile["id"]) + "_model"
        # modelfinal.save(name)
        pickleFilePath = os.path.join(location, name)
        with open(pickleFilePath, 'wb') as pkl:
            pickle.dump(modelfinal, pkl)

        # shutil.move(name,location)

        return {
            "Successful": True,
            "cleanDataPath": dataconfigfile["clean_data_address"],
            "metricsLocation": metricsLocation,
            "pickleFolderPath": location,
            "pickleFilePath": pickleFilePath,
            "plotLocation": plotlocation,
            "accuracy": r2score
        }
Esempio n. 12
0
##5.3.1 序列的相关性检验
from statsmodels.graphics.tsaplots import acf,plot_acf
np.round(acf(y2),3)

plot_acf(y1); # MR(1)模型的自相关系数

def ac_QP(Yt):
    import statsmodels.api as sm
    r,q,p = sm.tsa.acf(Yt, qstat=True)
    rqp=np.c_[r[1:], q, p]
    rqp=pd.DataFrame(rqp, columns=["AC", "Q", "Prob(>Q)"]);
    return(rqp)
ac_QP(y2)[:10]

from statsmodels.graphics.tsaplots import pacf,plot_pacf
np.round(pacf(y1),3)

plot_pacf(y2); # AR(1)模型的自相关系数

##5.3.2 ARMA 模型建立与检验
plot_acf(y3);
plot_pacf(y3);

import statsmodels.tsa.stattools as ts
ts.arma_order_select_ic(y1,max_ar=3,max_ma=3,ic=['aic','bic','hqic'])
ts.arma_order_select_ic(y1,max_ar=3,max_ma=3,ic=['aic','bic','hqic'])
ts.arma_order_select_ic(y3,max_ar=3,max_ma=3,ic=['aic', 'bic','hqic'])

from statsmodels.tsa.arima_model import ARMA
y1_arma=ARMA(y1,order=(1,0)).fit()
y1_arma.summary()