def simu_ar(data, a, maxlag=30, true_order=1): '''data:要拟合的数据;a为参数,可以为列表;mailbag:最大滞后阶数''' # 拟合AR(p)模型 result = smt.AR(data).fit(maxlag=maxlag, ic='aic', trend='nc') # 选择滞后阶数 est_order = smt.AR(data).select_order(maxlag=maxlag, ic='aic', trend='nc') # 参数选择标准ic : 有四个选择 {‘aic’,’bic’,’hqic’,’t-stat’} # 趋势项:trend:c是指包含常数项,nc为不含常数项 # 打印结果 print(f'参数估计值:{result.params.round(2)},估计的滞后阶数:{est_order}') print(f'真实参数值:{a},真实滞后阶数 {true_order}')
def ar_model(code='399001'): # Select best lag order for hs300 returns max_lag = 30 Y = get_stock_data(code) ts_plot(Y, lags=max_lag, title=code) result = smt.AR(Y.values).fit(maxlag=max_lag, ic='aic', trend='nc') est_order = smt.AR(Y.values).select_order(maxlag=max_lag, ic='aic', trend='nc') print(code + f'拟合AR模型的参数:{result.params.round(2)}') print(code + f'拟合AR模型的最佳滞后阶数 {est_order}') return est_order
def getbestar(self, data, symbol): max_lag = 30 mdl = smt.AR(data[symbol]).fit(maxlag=max_lag, ic='aic', trend='nc') best_order = smt.AR(data[symbol]).select_order(maxlag=max_lag, ic='aic', trend='nc') self.label_dikiful_2.setText( 'best estimated lag order = {}'.format(best_order)) max_lag = best_order Y = data[symbol] best_mdl = smt.ARMA(Y, order=(0, 3)).fit(maxlag=max_lag, method='mle', trend='nc') if self.checkBox_forecast.isChecked(): self.forecast(data, symbol, best_mdl, int(self.sdiffspinBox_2.text())) elif not self.checkBox_forecast.isChecked(): self.tsplot(best_mdl.resid, symbol)
def AR_p(): np.random.seed(1) n_samples = int(1000) a = 0.6 x = w = np.random.normal(size=n_samples) for t in range(n_samples): x[t] = a * x[t - 1] + w[t] _ = tsplot(x, lags=30) mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc') est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc') true_order = 1 print('\nalpha estimate: {:3.5f} | best lag order = {}'.format( mdl.params[0], est_order)) print('\ntrue alpha = {} | true order = {}'.format(a, true_order)) n = int(1000) alphas = np.array([.666, -.333]) betas = np.array([0.]) # Python requires us to specify the zero-lag value which is 1 # Also note that the alphas for the AR model must be negated # We also set the betas for the MA equal to 0 for an AR(p) model # For more information see the examples at statsmodels.org ar = np.r_[1, -alphas] ma = np.r_[1, betas] ar2 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n) _ = tsplot(ar2, lags=30) max_lag = 10 mdl = smt.AR(ar2).fit(maxlag=max_lag, ic='aic', trend='nc') est_order = smt.AR(ar2).select_order(maxlag=max_lag, ic='aic', trend='nc') true_order = 2 print('\ncoef estimate: {:3.4f} {:3.4f} | best lag order = {}'.format( mdl.params[0], mdl.params[1], est_order)) print('\ntrue coefs = {} | true order = {}'.format([.666, -.333], true_order))
def cov(self, cols=False, max_periods=False, decay=False, shrink=False, AR=False): if cols: if not (isinstance(cols, list)): cols = [cols] X = DataFrame(self[cols]) else: X = DataFrame(self) cols = list(self.columns) if max_periods: X = X[-max_periods:] if AR: R = DataFrame(index=self.index, columns=cols) for col in cols: A = X[col] m = tsa.AR(array(A)) f = m.fit(1) p = f.params R[col] = A - p[0] - p[1] * A.shift(1) R = R[1:] if decay: if (decay <= 0) or (decay >= 1): print 'Warning: The decay parameter is not between 0 and 1.' n = R.shape[0] vec = array(R[0:1]) cov = vec.T.dot(vec) for i in arange(1, n): vec = array(R[i:i + 1]) cov = decay * cov + (1 - decay) * vec.T.dot(vec) cov = DataFrame(cov, index=cols, columns=cols) else: cov = R.cov() elif decay: if (decay <= 0) or (decay >= 1): print 'Warning: The decay parameter is not between 0 and 1.' n = X.shape[0] vec = array(X[0:1]) cov = vec.T.dot(vec) for i in arange(1, n): vec = array(X[i:i + 1]) cov = decay * cov + (1 - decay) * vec.T.dot(vec) cov = DataFrame(cov, index=cols, columns=cols) else: if len(cols) == 1: cov = var(array(X)) else: cov = X.cov() if shrink: if (shrink <= 0) or (shrink >= 1): print 'Warning: The shrinkage parameter is not between 0 and 1.' cov = ShrinkCovs(cov, delta=shrink) return DataFrame(cov, index=X.columns, columns=X.columns)
def fit_ar_model_and_estimate_order( self, data, maxlag=None, method='mle', ic='bic', trend='nc' ): if maxlag is None: maxlag = self.__n_lags log('Fitting the AR model to the given data...') mdl = smt.AR(data).fit( maxlag=maxlag, method=method, ic=ic, trend=trend ) logn('[Done]') log('Estimating the order of the AR model...') est_order = smt.AR(data).select_order( maxlag=maxlag, method=method, ic=ic, trend=trend ) logn('[Done]') return mdl.params, est_order
def autoregr(self, cols=False, degree=1, conf=0.95): if cols: if not (isinstance(cols, list)): cols = [cols] else: cols = list(self.columns) names = ['b' + repr(i) for i in range(degree + 1)] names[0] = 'a' names2 = [] for name in names: names2.append(name + '_lower') names2.append(name + '_upper') P = DataFrame(index=names, columns=cols) C = DataFrame(index=names2, columns=cols) for col in cols: x = array(self[col]) m = tsa.AR(x) f = m.fit(degree) c = f.conf_int(alpha=1 - conf) p = f.params C[col] = c.reshape((2 * (degree + 1), 1)) P[col] = p.reshape((degree + 1, 1)) return P, C
def mean(self, cols=False, max_periods=False, decay=False, shrink=False, AR=False, targetzero=False): if cols: if not (isinstance(cols, list)): cols = [cols] X = DataFrame(self[cols]) else: X = DataFrame(self) cols = list(self.columns) if max_periods: X = X[-max_periods:] if AR: mn = Series(index=cols) for col in cols: A = self[col] m = tsa.AR(array(A)) f = m.fit(1) p = f.params mn[col] = p[0] + p[1] * A[-1:] elif decay: if (decay <= 0) or (decay >= 1): print 'Warning: The decay parameter is not between 0 and 1.' a = X[0] for i in arange(1, n): a = decay * a + (1 - decay) * X[i] mn = Series(a, index=cols) else: mn = X.mean() if shrink: if targetzero: mn = (1 - shrink) * mn else: mn = (1 - shrink) * mn + shrink * mean(mn) return mn
'''Simulating AR(1) with alpha 0.6''' np.random.seed(1) n_samples = 1000 a = 0.6 x = w = np.random.normal(size=n_samples) for t in range(n_samples): x[t] = a * x[t - 1] + w[t] _ = tsplot(x, lags=30) mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc') '''ic = information criteria''' mdl.params[0] print(mdl.summary()) order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc') from statsmodels.tsa.stattools import adfuller dftest = adfuller(lprice, autolag='AIC') dftest2 = adfuller(ret, autolag='AIC') best_aic = np.inf
mdl = smt.ARMA(np.ndarray.flatten(vals), order=(0, 1)).fit(maxlag=30, method='mle', trend='nc') print(mdl.summary()) exit() option = 2 if option == 1: data = pd.read_csv('Regressive_2.csv', header=None) vals = data.values for i in range(0, len(vals)): # tsplot(vals,lags=30) # Fit an AR(p) model to simulated AR(1) model with alpha = 0.6 mdl = smt.AR(vals[i]).fit(maxlag=30, ic='aic', trend='nc') # % time est_order = smt.AR(vals[i]).select_order(maxlag=30, ic='aic', trend='nc') print('\nalpha estimate: {:3.5f} | best lag order = {}'.format( mdl.params[0], est_order)) elif option == 2: data = pd.read_csv('Move_Avg.csv', header=None) vals = data.values for i in range(0, len(vals)): max_lag = 30 # tsplot(vals[i],lags=30) mdl = smt.ARMA(vals[i], order=(0, 1)).fit(maxlag=max_lag, method='mle',
sm.qqplot(y, line='s', ax=qq_ax) qq_ax.set_title('QQ Plot') scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax) plt.tight_layout() return # In[197]: _ = tsplot(Y, lags=max_lag) # In[272]: # Select best lag order for BTC returns using Autoregressive Model ar_est_order = smt.AR(Y).select_order(maxlag=max_lag, ic='aic', trend='nc') p('best estimated lag order = {}'.format(ar_est_order)) # In[158]: # Fit MA(3) to BTC returns arma_mdl = smt.ARMA(Y, order=(0, 3)).fit(maxlag=max_lag, method='mle', trend='nc', freq=freq) p(arma_mdl.summary()) # # ARMA # In[199]:
ax.tick_params('both',labelsize=16) plt.show() from statsmodels.graphics.tsaplots import plot_pacf f, axarr = plt.subplots(1, 1, figsize=(14,10)) _ = plot_pacf(Qt,method='ols',lags=40,ax=axarr.axes) axarr.set_title('Statsmodel plot of PACF',fontsize=16) axarr.set_ylabel('Partial Autocorrelation Function (PACF) [-]',fontsize=16) axarr.set_xlabel('Lag Distance [day]',fontsize=16) axarr = plt.gca() axarr.tick_params('both',labelsize=16) #Fit AR(1) Model to Data Q_AR1_model = sm.AR(Qt).fit(1) print(Q_AR1_model.params) #Use developed model to make predictions of the test data Qtrain = df_train['in_unreg'].values Qtest = df_test['in_unreg'].values DatesTest = df_test['Date'].values Qttm1 = np.concatenate([Qtrain[-2:-1],Qtest[0:-1]]) #slice notation: -1 (last item in array) [-2:] last two items in array - so this is taking the 2nd to last value of the training set and the adding it to the beginning of the test set AR1_mu = Q_AR1_model.params[0] AR1_phi1 = Q_AR1_model.params[1]
import statsmodels.stats as sts import matplotlib.pyplot as plt import matplotlib as mpl plt.style.use('seaborn') data = pd.read_csv('800FinalData.csv',index_col = 0, parse_dates = True) data.index = pd.to_datetime(data.index) SPY = np.log(data.SPY / data.SPY.shift(1)) SPY.dropna(inplace = True) md1 = smt.AR(SPY).fit(maxlag = 1, ic = 'aic', trend = 'nc') eps = md1.resid fittedvalue = md1.fittedvalues md1.summary() len(SPY) from pandas import datetime md1.predict(start = datetime(2018,2,1), end = datetime(2018,2,5)) predict1 = md1.predict(1, len(SPY)) md1.k_ar res = am.fit()
name='fillDate') infy_data = infy_data.reindex(ix, fill_value=0) infy_data['Symbol'] = "INFY" i = 0 while i < len(infy_data): if infy_data.ix[i, 'Close'] == 0.00 and i != 0: infy_data.ix[i, 'Close'] = infy_data.ix[i - 1, 'Close'] print infy_data.iloc[[i - 1], 8] i += 1 X = infy_data['Close'] # split dataset into train and test. We will retain last 7 observations to test our model train, test = X[1:len(X) - 14], X[len(X) - 14:] # train autoregression model = smt.AR(train) model_fit = model.fit() print('Lag: %s' % model_fit.k_ar) print('\nCoefficients: %s' % model_fit.params) # make predictions predictions = model_fit.predict(start=len(train), end=len(train) + len(test) - 1, dynamic=False) for i in range(len(predictions)): print('predicted=%f, expected=%f' % (predictions[i], test[i])) error = mean_squared_error(test, predictions) print('Test MSE: %.3f' % error) # plot results test.plot(figsize=(15, 6), grid=True, label="Test") predictions.plot(color='orange', label="Predictions") plt.legend()
#%% # AR Model # synthetic lags = 30 np.random.seed(1) n_samples = int(1000) a = 0.6 x = w = np.random.normal(size=n_samples) for t in range(n_samples): x[t] = a * x[t - 1] + w[t] _ = tsplot(x, lags=lags) # fit synthetic mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc') est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc') true_order = 1 print('\nalpha estimate: {:3.5f} | best lag order = {}'.format( mdl.params[0], est_order)) print('\ntrue alpha = {} | true order = {}'.format(a, true_order)) #%% max_lag = 30 mdl = smt.AR(log_returns).fit(maxlag=max_lag, ic='aic', trend='nc') est_order = smt.AR(log_returns).select_order(maxlag=max_lag, ic='aic', trend='nc') print('best estimated lag order = {}'.format(est_order))
np.random.seed(1) n_samples = int(1000) a = 0.6 x = w = np.random.normal(size=n_samples) for t in range(n_samples): x[t] = a*x[t-1] + w[t] _ = tsplot(x, lags=30) # Fit an AR(p) model to simulated AR(1) model with alpha = 0.6 import warnings; warnings.simplefilter("ignore") mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc') %time est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc') true_order = 1 p('\nalpha estimate: {:3.5f} | best lag order = {}'.format(mdl.params[0], est_order)) p('\ntrue alpha = {} | true order = {}'.format(a, true_order)) # Simulate an AR(2) process n = int(1000) alphas = np.array([.666, -.333]) betas = np.array([0.]) # Python requires us to specify the zero-lag value which is 1 # Also note that the alphas for the AR model must be negated # We also set the betas for the MA equal to 0 for an AR(p) model
def train( data: np.ndarray, used_model: str = "autoreg", p: int = 5, d: int = 1, q: int = 0, cov_type="nonrobust", method="cmle", trend="nc", solver="lbfgs", maxlag=13, # SARIMAX args seasonal=(0, 0, 0, 0), ) -> Any: """Autoregressive model from statsmodels library. Only univariate data. Args: data (np.ndarray): Time series data. used_model (str, optional): Used model. Defaults to "autoreg". p (int, optional): Order of ARIMA model (1st - proportional). Check statsmodels docs for more. Defaults to 5. d (int, optional): Order of ARIMA model. Defaults to 1. q (int, optional): Order of ARIMA model. Defaults to 0. cov_type: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 'nonrobust'. method: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 'cmle'. trend: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 'nc'. solver: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 'lbfgs'. maxlag: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 13. seasonal: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to (0, 0, 0, 0). Returns: statsmodels.model: Trained model. """ import statsmodels.tsa.api as sm from statsmodels.tsa.statespace.sarimax import SARIMAX from statsmodels.tsa.arima.model import ARIMA from statsmodels.tsa import ar_model used_model = used_model.lower() if used_model == "ar": model = sm.AR(data) fitted_model = model.fit(method=method, trend=trend, solver=solver, disp=0) elif used_model == "arima": order = (p, d, q) model = ARIMA(data, order=order) fitted_model = model.fit() elif used_model == "sarimax": order = (p, d, q) model = SARIMAX(data, order=order, seasonal_order=seasonal) fitted_model = model.fit(method=method, trend=trend, solver=solver, disp=0) elif used_model == "autoreg": auto = ar_model.ar_select_order(data, maxlag=maxlag) model = ar_model.AutoReg( data, lags=auto.ar_lags, trend=auto.trend, seasonal=auto.seasonal, period=auto.period, ) fitted_model = model.fit(cov_type=cov_type) else: raise ValueError( f"Used model has to be one of ['ar', 'arima', 'sarimax', 'autoreg']. You configured: {used_model}" ) setattr(fitted_model, "my_name", used_model) setattr(fitted_model, "data_length", len(data)) return fitted_model
# AR(2) => x(t) = alpha1*x(t-1) + alpha2*x(t-2) + w(t), where w(t) is white noise # simulate an AR(1) process with alpha =0.6 np.random.seed(1) a = 0.6 x = w = np.random.normal(size=n_samples) for t in range(n_samples): x[t] = a * x[t - 1] + w[t] p("AR(1), Alpha=0.6\n----------------------\nmean: {:.3f}\nvariance: {:.3f}\nstandard deviation: {:.3f}" .format(x.mean(), x.var(), x.std())) p("\n") _ = tsplot(x, lags=30, title='AR(1) Process Simulation, Alpha=0.6') # Lets fit a AR(p) model to the above... we should get back from the fit that alpha ~ 0.6 and p = 1 mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc') est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc', method='mle') true_order = 1 p('\nalpha estimate: {:3.5f} | best lag order = {}'.format( mdl.params[0], est_order)) p('\ntrue alpha = {} | true order = {}'.format(a, true_order)) p("\n\n") # simulate AR(2) process with alpha0=0.666, alpha1=-0.333 alphas = np.array([0.666, -0.333]) betas = np.array([0. ]) # betas are for later, the MA component when we add MA(q) ar = np.r_[1, -alphas]
#minimum and maximum frequencies minfreq = freqvals[newfreqrange][0] maxfreq = freqvals[newfreqrange][-1] 'bandpass filter' b, a = signal.butter( 1, [minfreq / (0.5 * 500), maxfreq / (0.5 * 500)], btype='band') #use min and max freq to bandpass filter cleanlast1second = signal.filtfilt(b, a, last1second) #zero phase band pass 'time series forward prediction' predictionoverlaplength = 0.1 #how far forward you want to predict, in seconds frames = round(predictionoverlaplength * len(cleanlast1second)) #calculate that in frames ARmodel = tsa.AR( cleanlast1second[frames:(-1 * frames)]) #get autoregressive model ARmodelfit = ARmodel.fit(ic='aic') #fit the model ARmodelpredict = ARmodel.predict(params=ARmodelfit.params, start=len(cleanlast1second) - 2 * frames, end=len(cleanlast1second) + 0 * frames, dynamic=True) #predict predicteddata = np.concatenate( (cleanlast1second[:-frames], ARmodelpredict)) #get predicted data 'plot the prediction' #plt.figure() #plt.plot(predicteddata,label='predict') #plt.plot(cleanlast1second, label='orig') #plt.plot(cleanlast1second[:(-1*frames)]) #plt.legend()
def AR_MSFT(): import os import sys import pandas as pd import pandas_datareader.data as web import numpy as np import statsmodels.formula.api as smf import statsmodels.tsa.api as smt import statsmodels.api as sm import scipy.stats as scs from arch import arch_model import matplotlib.pyplot as plt import matplotlib as mpl get_ipython().magic('matplotlib inline') p = print end = '2017-01-01' start = '2008-01-01' get_px = lambda x: web.DataReader(x, 'yahoo', start=start, end=end)[ 'Adj Close'] symbols = ['SPY', 'TLT', 'MSFT'] # raw adjusted close prices data = pd.DataFrame({sym: get_px(sym) for sym in symbols}) # log returns lrets = np.log(data / data.shift(1)).dropna() def tsplot(y, lags=None, figsize=(10, 8), style='bmh'): if not isinstance(y, pd.Series): y = pd.Series(y) with plt.style.context(style): fig = plt.figure(figsize=figsize) #mpl.rcParams['font.family'] = 'Ubuntu Mono' layout = (3, 2) ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2) acf_ax = plt.subplot2grid(layout, (1, 0)) pacf_ax = plt.subplot2grid(layout, (1, 1)) qq_ax = plt.subplot2grid(layout, (2, 0)) pp_ax = plt.subplot2grid(layout, (2, 1)) y.plot(ax=ts_ax) ts_ax.set_title('Time Series Analysis Plots') smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5) smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5) sm.qqplot(y, line='s', ax=qq_ax) qq_ax.set_title('QQ Plot') scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax) plt.tight_layout() return # Select best lag order for MSFT returns max_lag = 30 mdl = smt.AR(lrets.MSFT).fit(maxlag=max_lag, ic='aic', trend='nc') est_order = smt.AR(lrets.MSFT).select_order(maxlag=max_lag, ic='aic', trend='nc') p('best estimated lag order = {}'.format(est_order))