コード例 #1
0
ファイル: demo1.py プロジェクト: chntylz/quant
def simu_ar(data, a, maxlag=30, true_order=1):
    '''data:要拟合的数据;a为参数,可以为列表;mailbag:最大滞后阶数'''
    # 拟合AR(p)模型
    result = smt.AR(data).fit(maxlag=maxlag, ic='aic', trend='nc')
    # 选择滞后阶数
    est_order = smt.AR(data).select_order(maxlag=maxlag, ic='aic', trend='nc')
    # 参数选择标准ic : 有四个选择 {‘aic’,’bic’,’hqic’,’t-stat’}
    # 趋势项:trend:c是指包含常数项,nc为不含常数项
    # 打印结果
    print(f'参数估计值:{result.params.round(2)},估计的滞后阶数:{est_order}')
    print(f'真实参数值:{a},真实滞后阶数 {true_order}')
コード例 #2
0
ファイル: demo1.py プロジェクト: chntylz/quant
def ar_model(code='399001'):
    #  Select best lag order for hs300 returns
    max_lag = 30
    Y = get_stock_data(code)

    ts_plot(Y, lags=max_lag, title=code)
    result = smt.AR(Y.values).fit(maxlag=max_lag, ic='aic', trend='nc')
    est_order = smt.AR(Y.values).select_order(maxlag=max_lag,
                                              ic='aic',
                                              trend='nc')
    print(code + f'拟合AR模型的参数:{result.params.round(2)}')
    print(code + f'拟合AR模型的最佳滞后阶数 {est_order}')
    return est_order
コード例 #3
0
 def getbestar(self, data, symbol):
     max_lag = 30
     mdl = smt.AR(data[symbol]).fit(maxlag=max_lag, ic='aic', trend='nc')
     best_order = smt.AR(data[symbol]).select_order(maxlag=max_lag,
                                                    ic='aic',
                                                    trend='nc')
     self.label_dikiful_2.setText(
         'best estimated lag order = {}'.format(best_order))
     max_lag = best_order
     Y = data[symbol]
     best_mdl = smt.ARMA(Y, order=(0, 3)).fit(maxlag=max_lag,
                                              method='mle',
                                              trend='nc')
     if self.checkBox_forecast.isChecked():
         self.forecast(data, symbol, best_mdl,
                       int(self.sdiffspinBox_2.text()))
     elif not self.checkBox_forecast.isChecked():
         self.tsplot(best_mdl.resid, symbol)
コード例 #4
0
def AR_p():
    np.random.seed(1)
    n_samples = int(1000)
    a = 0.6
    x = w = np.random.normal(size=n_samples)

    for t in range(n_samples):
        x[t] = a * x[t - 1] + w[t]

    _ = tsplot(x, lags=30)

    mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc')
    est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc')

    true_order = 1
    print('\nalpha estimate: {:3.5f} | best lag order = {}'.format(
        mdl.params[0], est_order))
    print('\ntrue alpha = {} | true order = {}'.format(a, true_order))

    n = int(1000)
    alphas = np.array([.666, -.333])
    betas = np.array([0.])

    # Python requires us to specify the zero-lag value which is 1
    # Also note that the alphas for the AR model must be negated
    # We also set the betas for the MA equal to 0 for an AR(p) model
    # For more information see the examples at statsmodels.org
    ar = np.r_[1, -alphas]
    ma = np.r_[1, betas]

    ar2 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n)
    _ = tsplot(ar2, lags=30)

    max_lag = 10
    mdl = smt.AR(ar2).fit(maxlag=max_lag, ic='aic', trend='nc')
    est_order = smt.AR(ar2).select_order(maxlag=max_lag, ic='aic', trend='nc')

    true_order = 2
    print('\ncoef estimate: {:3.4f} {:3.4f} | best lag order = {}'.format(
        mdl.params[0], mdl.params[1], est_order))
    print('\ntrue coefs = {} | true order = {}'.format([.666, -.333],
                                                       true_order))
コード例 #5
0
 def cov(self,
         cols=False,
         max_periods=False,
         decay=False,
         shrink=False,
         AR=False):
     if cols:
         if not (isinstance(cols, list)): cols = [cols]
         X = DataFrame(self[cols])
     else:
         X = DataFrame(self)
         cols = list(self.columns)
     if max_periods:
         X = X[-max_periods:]
     if AR:
         R = DataFrame(index=self.index, columns=cols)
         for col in cols:
             A = X[col]
             m = tsa.AR(array(A))
             f = m.fit(1)
             p = f.params
             R[col] = A - p[0] - p[1] * A.shift(1)
         R = R[1:]
         if decay:
             if (decay <= 0) or (decay >= 1):
                 print 'Warning: The decay parameter is not between 0 and 1.'
             n = R.shape[0]
             vec = array(R[0:1])
             cov = vec.T.dot(vec)
             for i in arange(1, n):
                 vec = array(R[i:i + 1])
                 cov = decay * cov + (1 - decay) * vec.T.dot(vec)
             cov = DataFrame(cov, index=cols, columns=cols)
         else:
             cov = R.cov()
     elif decay:
         if (decay <= 0) or (decay >= 1):
             print 'Warning: The decay parameter is not between 0 and 1.'
         n = X.shape[0]
         vec = array(X[0:1])
         cov = vec.T.dot(vec)
         for i in arange(1, n):
             vec = array(X[i:i + 1])
             cov = decay * cov + (1 - decay) * vec.T.dot(vec)
         cov = DataFrame(cov, index=cols, columns=cols)
     else:
         if len(cols) == 1: cov = var(array(X))
         else: cov = X.cov()
     if shrink:
         if (shrink <= 0) or (shrink >= 1):
             print 'Warning: The shrinkage parameter is not between 0 and 1.'
         cov = ShrinkCovs(cov, delta=shrink)
     return DataFrame(cov, index=X.columns, columns=X.columns)
コード例 #6
0
 def fit_ar_model_and_estimate_order(
         self, data, maxlag=None, method='mle', ic='bic', trend='nc'
         ):
     if maxlag is None:
         maxlag = self.__n_lags
     log('Fitting the AR model to the given data...')
     mdl = smt.AR(data).fit(
             maxlag=maxlag,
             method=method,
             ic=ic,
             trend=trend
             )
     logn('[Done]')
     log('Estimating the order of the AR model...')
     est_order = smt.AR(data).select_order(
             maxlag=maxlag,
             method=method,
             ic=ic,
             trend=trend
             )
     logn('[Done]')
     return mdl.params, est_order
コード例 #7
0
 def autoregr(self, cols=False, degree=1, conf=0.95):
     if cols:
         if not (isinstance(cols, list)): cols = [cols]
     else:
         cols = list(self.columns)
     names = ['b' + repr(i) for i in range(degree + 1)]
     names[0] = 'a'
     names2 = []
     for name in names:
         names2.append(name + '_lower')
         names2.append(name + '_upper')
     P = DataFrame(index=names, columns=cols)
     C = DataFrame(index=names2, columns=cols)
     for col in cols:
         x = array(self[col])
         m = tsa.AR(x)
         f = m.fit(degree)
         c = f.conf_int(alpha=1 - conf)
         p = f.params
         C[col] = c.reshape((2 * (degree + 1), 1))
         P[col] = p.reshape((degree + 1, 1))
     return P, C
コード例 #8
0
 def mean(self,
          cols=False,
          max_periods=False,
          decay=False,
          shrink=False,
          AR=False,
          targetzero=False):
     if cols:
         if not (isinstance(cols, list)): cols = [cols]
         X = DataFrame(self[cols])
     else:
         X = DataFrame(self)
         cols = list(self.columns)
     if max_periods:
         X = X[-max_periods:]
     if AR:
         mn = Series(index=cols)
         for col in cols:
             A = self[col]
             m = tsa.AR(array(A))
             f = m.fit(1)
             p = f.params
             mn[col] = p[0] + p[1] * A[-1:]
     elif decay:
         if (decay <= 0) or (decay >= 1):
             print 'Warning: The decay parameter is not between 0 and 1.'
         a = X[0]
         for i in arange(1, n):
             a = decay * a + (1 - decay) * X[i]
         mn = Series(a, index=cols)
     else:
         mn = X.mean()
     if shrink:
         if targetzero:
             mn = (1 - shrink) * mn
         else:
             mn = (1 - shrink) * mn + shrink * mean(mn)
     return mn
コード例 #9
0
'''Simulating AR(1) with alpha 0.6'''

np.random.seed(1)

n_samples = 1000

a = 0.6

x = w = np.random.normal(size=n_samples)

for t in range(n_samples):
    x[t] = a * x[t - 1] + w[t]

_ = tsplot(x, lags=30)

mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc')
'''ic = information criteria'''
mdl.params[0]

print(mdl.summary())

order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc')

from statsmodels.tsa.stattools import adfuller

dftest = adfuller(lprice, autolag='AIC')

dftest2 = adfuller(ret, autolag='AIC')

best_aic = np.inf
コード例 #10
0
ファイル: Checks.py プロジェクト: shaunmalti/M_Diss
mdl = smt.ARMA(np.ndarray.flatten(vals), order=(0, 1)).fit(maxlag=30,
                                                           method='mle',
                                                           trend='nc')
print(mdl.summary())

exit()

option = 2

if option == 1:
    data = pd.read_csv('Regressive_2.csv', header=None)
    vals = data.values
    for i in range(0, len(vals)):
        # tsplot(vals,lags=30)
        # Fit an AR(p) model to simulated AR(1) model with alpha = 0.6
        mdl = smt.AR(vals[i]).fit(maxlag=30, ic='aic', trend='nc')
        # % time
        est_order = smt.AR(vals[i]).select_order(maxlag=30,
                                                 ic='aic',
                                                 trend='nc')

        print('\nalpha estimate: {:3.5f} | best lag order = {}'.format(
            mdl.params[0], est_order))
elif option == 2:
    data = pd.read_csv('Move_Avg.csv', header=None)
    vals = data.values
    for i in range(0, len(vals)):
        max_lag = 30
        # tsplot(vals[i],lags=30)
        mdl = smt.ARMA(vals[i], order=(0, 1)).fit(maxlag=max_lag,
                                                  method='mle',
コード例 #11
0
        sm.qqplot(y, line='s', ax=qq_ax)
        qq_ax.set_title('QQ Plot')
        scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)

        plt.tight_layout()
    return


# In[197]:

_ = tsplot(Y, lags=max_lag)

# In[272]:

# Select best lag order for BTC returns using Autoregressive Model
ar_est_order = smt.AR(Y).select_order(maxlag=max_lag, ic='aic', trend='nc')
p('best estimated lag order = {}'.format(ar_est_order))

# In[158]:

# Fit MA(3) to BTC returns
arma_mdl = smt.ARMA(Y, order=(0, 3)).fit(maxlag=max_lag,
                                         method='mle',
                                         trend='nc',
                                         freq=freq)
p(arma_mdl.summary())

# # ARMA

# In[199]:
コード例 #12
0
ax.tick_params('both',labelsize=16)
plt.show()

from statsmodels.graphics.tsaplots import plot_pacf
f, axarr = plt.subplots(1, 1, figsize=(14,10))
_ = plot_pacf(Qt,method='ols',lags=40,ax=axarr.axes)
axarr.set_title('Statsmodel plot of PACF',fontsize=16)
axarr.set_ylabel('Partial Autocorrelation Function (PACF) [-]',fontsize=16)
axarr.set_xlabel('Lag Distance [day]',fontsize=16)
axarr = plt.gca()
axarr.tick_params('both',labelsize=16)


#Fit AR(1) Model to Data

Q_AR1_model = sm.AR(Qt).fit(1)
print(Q_AR1_model.params)


#Use developed model to make predictions of the test data
Qtrain = df_train['in_unreg'].values
Qtest  = df_test['in_unreg'].values

DatesTest = df_test['Date'].values

Qttm1 = np.concatenate([Qtrain[-2:-1],Qtest[0:-1]]) 
#slice notation: -1 (last item in array) [-2:] last two items in array - so this is taking the 2nd to last value of the training set and the adding it to the beginning of the test set

AR1_mu   = Q_AR1_model.params[0] 
AR1_phi1 = Q_AR1_model.params[1]
コード例 #13
0
import statsmodels.stats as sts

import matplotlib.pyplot as plt
import matplotlib as mpl

plt.style.use('seaborn')


data = pd.read_csv('800FinalData.csv',index_col = 0, parse_dates = True)
data.index = pd.to_datetime(data.index)

SPY = np.log(data.SPY / data.SPY.shift(1))

SPY.dropna(inplace = True)

md1 = smt.AR(SPY).fit(maxlag = 1, ic = 'aic', trend = 'nc')
eps = md1.resid
fittedvalue = md1.fittedvalues

md1.summary()
len(SPY)
from pandas import datetime
md1.predict(start = datetime(2018,2,1), end = datetime(2018,2,5))

predict1 = md1.predict(1, len(SPY))
md1.k_ar



res = am.fit()
コード例 #14
0
                      name='fillDate')
infy_data = infy_data.reindex(ix, fill_value=0)
infy_data['Symbol'] = "INFY"

i = 0
while i < len(infy_data):
    if infy_data.ix[i, 'Close'] == 0.00 and i != 0:
        infy_data.ix[i, 'Close'] = infy_data.ix[i - 1, 'Close']
        print infy_data.iloc[[i - 1], 8]
    i += 1

X = infy_data['Close']
# split dataset into train and test. We will retain last 7 observations to test our model
train, test = X[1:len(X) - 14], X[len(X) - 14:]
# train autoregression
model = smt.AR(train)
model_fit = model.fit()
print('Lag: %s' % model_fit.k_ar)
print('\nCoefficients: %s' % model_fit.params)
# make predictions
predictions = model_fit.predict(start=len(train),
                                end=len(train) + len(test) - 1,
                                dynamic=False)
for i in range(len(predictions)):
    print('predicted=%f, expected=%f' % (predictions[i], test[i]))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
# plot results
test.plot(figsize=(15, 6), grid=True, label="Test")
predictions.plot(color='orange', label="Predictions")
plt.legend()
コード例 #15
0
#%%
# AR Model
# synthetic
lags = 30
np.random.seed(1)
n_samples = int(1000)
a = 0.6
x = w = np.random.normal(size=n_samples)

for t in range(n_samples):
    x[t] = a * x[t - 1] + w[t]

_ = tsplot(x, lags=lags)

# fit synthetic
mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc')
est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc')

true_order = 1
print('\nalpha estimate: {:3.5f} | best lag order = {}'.format(
    mdl.params[0], est_order))
print('\ntrue alpha = {} | true order = {}'.format(a, true_order))
#%%
max_lag = 30
mdl = smt.AR(log_returns).fit(maxlag=max_lag, ic='aic', trend='nc')
est_order = smt.AR(log_returns).select_order(maxlag=max_lag,
                                             ic='aic',
                                             trend='nc')

print('best estimated lag order = {}'.format(est_order))
コード例 #16
0
np.random.seed(1)
n_samples = int(1000)
a = 0.6
x = w = np.random.normal(size=n_samples)

for t in range(n_samples):
    x[t] = a*x[t-1] + w[t]
    
_ = tsplot(x, lags=30)



# Fit an AR(p) model to simulated AR(1) model with alpha = 0.6
import warnings; warnings.simplefilter("ignore")
mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc')
%time est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc')

true_order = 1
p('\nalpha estimate: {:3.5f} | best lag order = {}'.format(mdl.params[0], est_order))
p('\ntrue alpha = {} | true order = {}'.format(a, true_order))

# Simulate an AR(2) process

n = int(1000)
alphas = np.array([.666, -.333])
betas = np.array([0.])

# Python requires us to specify the zero-lag value which is 1
# Also note that the alphas for the AR model must be negated
# We also set the betas for the MA equal to 0 for an AR(p) model
コード例 #17
0
def train(
    data: np.ndarray,
    used_model: str = "autoreg",
    p: int = 5,
    d: int = 1,
    q: int = 0,
    cov_type="nonrobust",
    method="cmle",
    trend="nc",
    solver="lbfgs",
    maxlag=13,
    # SARIMAX args
    seasonal=(0, 0, 0, 0),
) -> Any:
    """Autoregressive model from statsmodels library. Only univariate data.

    Args:
        data (np.ndarray): Time series data.
        used_model (str, optional): Used model. Defaults to "autoreg".
        p (int, optional): Order of ARIMA model (1st - proportional). Check statsmodels docs for more. Defaults to 5.
        d (int, optional): Order of ARIMA model. Defaults to 1.
        q (int, optional): Order of ARIMA model. Defaults to 0.
        cov_type: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 'nonrobust'.
        method: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 'cmle'.
        trend: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 'nc'.
        solver: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 'lbfgs'.
        maxlag: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 13.
        seasonal: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to (0, 0, 0, 0).

    Returns:
        statsmodels.model: Trained model.
    """

    import statsmodels.tsa.api as sm
    from statsmodels.tsa.statespace.sarimax import SARIMAX
    from statsmodels.tsa.arima.model import ARIMA
    from statsmodels.tsa import ar_model

    used_model = used_model.lower()

    if used_model == "ar":
        model = sm.AR(data)
        fitted_model = model.fit(method=method, trend=trend, solver=solver, disp=0)

    elif used_model == "arima":
        order = (p, d, q)
        model = ARIMA(data, order=order)
        fitted_model = model.fit()

    elif used_model == "sarimax":
        order = (p, d, q)
        model = SARIMAX(data, order=order, seasonal_order=seasonal)
        fitted_model = model.fit(method=method, trend=trend, solver=solver, disp=0)

    elif used_model == "autoreg":
        auto = ar_model.ar_select_order(data, maxlag=maxlag)
        model = ar_model.AutoReg(
            data,
            lags=auto.ar_lags,
            trend=auto.trend,
            seasonal=auto.seasonal,
            period=auto.period,
        )
        fitted_model = model.fit(cov_type=cov_type)

    else:
        raise ValueError(
            f"Used model has to be one of ['ar', 'arima', 'sarimax', 'autoreg']. You configured: {used_model}"
        )

    setattr(fitted_model, "my_name", used_model)
    setattr(fitted_model, "data_length", len(data))

    return fitted_model
コード例 #18
0
# AR(2) => x(t) = alpha1*x(t-1) + alpha2*x(t-2) + w(t), where w(t) is white noise

# simulate an AR(1) process with alpha =0.6
np.random.seed(1)
a = 0.6
x = w = np.random.normal(size=n_samples)

for t in range(n_samples):
    x[t] = a * x[t - 1] + w[t]
p("AR(1), Alpha=0.6\n----------------------\nmean: {:.3f}\nvariance: {:.3f}\nstandard deviation: {:.3f}"
  .format(x.mean(), x.var(), x.std()))
p("\n")
_ = tsplot(x, lags=30, title='AR(1) Process Simulation, Alpha=0.6')

# Lets fit a AR(p) model to the above... we should get back from the fit that alpha ~ 0.6 and p = 1
mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc')
est_order = smt.AR(x).select_order(maxlag=30,
                                   ic='aic',
                                   trend='nc',
                                   method='mle')
true_order = 1
p('\nalpha estimate: {:3.5f} | best lag order = {}'.format(
    mdl.params[0], est_order))
p('\ntrue alpha = {} | true order = {}'.format(a, true_order))
p("\n\n")

# simulate AR(2) process with alpha0=0.666, alpha1=-0.333
alphas = np.array([0.666, -0.333])
betas = np.array([0.
                  ])  # betas are for later, the MA component when we add MA(q)
ar = np.r_[1, -alphas]
コード例 #19
0
ファイル: ReadingEEG.py プロジェクト: tonydong6/EEGanalysis
    #minimum and maximum frequencies
    minfreq = freqvals[newfreqrange][0]
    maxfreq = freqvals[newfreqrange][-1]

    'bandpass filter'
    b, a = signal.butter(
        1, [minfreq / (0.5 * 500), maxfreq / (0.5 * 500)],
        btype='band')  #use min and max freq to bandpass filter
    cleanlast1second = signal.filtfilt(b, a,
                                       last1second)  #zero phase band pass

    'time series forward prediction'
    predictionoverlaplength = 0.1  #how far forward you want to predict, in seconds
    frames = round(predictionoverlaplength *
                   len(cleanlast1second))  #calculate that in frames
    ARmodel = tsa.AR(
        cleanlast1second[frames:(-1 * frames)])  #get autoregressive model
    ARmodelfit = ARmodel.fit(ic='aic')  #fit the model
    ARmodelpredict = ARmodel.predict(params=ARmodelfit.params,
                                     start=len(cleanlast1second) - 2 * frames,
                                     end=len(cleanlast1second) + 0 * frames,
                                     dynamic=True)  #predict
    predicteddata = np.concatenate(
        (cleanlast1second[:-frames], ARmodelpredict))  #get predicted data

    'plot the prediction'
    #plt.figure()
    #plt.plot(predicteddata,label='predict')
    #plt.plot(cleanlast1second, label='orig')
    #plt.plot(cleanlast1second[:(-1*frames)])
    #plt.legend()
コード例 #20
0
def AR_MSFT():

    import os
    import sys

    import pandas as pd
    import pandas_datareader.data as web
    import numpy as np

    import statsmodels.formula.api as smf
    import statsmodels.tsa.api as smt
    import statsmodels.api as sm
    import scipy.stats as scs
    from arch import arch_model

    import matplotlib.pyplot as plt
    import matplotlib as mpl
    get_ipython().magic('matplotlib inline')
    p = print

    end = '2017-01-01'
    start = '2008-01-01'
    get_px = lambda x: web.DataReader(x, 'yahoo', start=start, end=end)[
        'Adj Close']

    symbols = ['SPY', 'TLT', 'MSFT']
    # raw adjusted close prices
    data = pd.DataFrame({sym: get_px(sym) for sym in symbols})
    # log returns
    lrets = np.log(data / data.shift(1)).dropna()

    def tsplot(y, lags=None, figsize=(10, 8), style='bmh'):
        if not isinstance(y, pd.Series):
            y = pd.Series(y)
        with plt.style.context(style):
            fig = plt.figure(figsize=figsize)
            #mpl.rcParams['font.family'] = 'Ubuntu Mono'
            layout = (3, 2)
            ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
            acf_ax = plt.subplot2grid(layout, (1, 0))
            pacf_ax = plt.subplot2grid(layout, (1, 1))
            qq_ax = plt.subplot2grid(layout, (2, 0))
            pp_ax = plt.subplot2grid(layout, (2, 1))

            y.plot(ax=ts_ax)
            ts_ax.set_title('Time Series Analysis Plots')
            smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
            smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5)
            sm.qqplot(y, line='s', ax=qq_ax)
            qq_ax.set_title('QQ Plot')
            scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)

            plt.tight_layout()
        return

    # Select best lag order for MSFT returns

    max_lag = 30
    mdl = smt.AR(lrets.MSFT).fit(maxlag=max_lag, ic='aic', trend='nc')
    est_order = smt.AR(lrets.MSFT).select_order(maxlag=max_lag,
                                                ic='aic',
                                                trend='nc')

    p('best estimated lag order = {}'.format(est_order))