def forecast_AR(da, pred): forecast = np.zeros(shape=(pred,da.shape[1])) #forecasting per maturity (suppose no dependance between maturities) for i in range(0,da.shape[1]): #fit to time series y_{tau} for fixed tau model = AR(da[:,i]) model_fitted = model.fit(1) #assume y_{tau,t+1} = b0 + b1*y_{tau,t} + \epsilon b0=model_fitted.params[0] b1=model_fitted.params[1] #print(model_fitted.params) # compute predictions mu = b0 / (1-b1) # compute the mean in the reparametrisation: x_t - mu = b_1 ( x - mu_{t-1}) + e_t for j in range(0,pred): forecast[j,i] = mu + b1**(j+1) * (da[-1,i]-mu) # #compute predictions by rolling out the recursion : \hat{y}_{tau,t+j} = b0*(1+...+b1^{j-1}) + b1^j * \hat{y}_{tau,t} # for j in range(0,pred): # for k in range(j,pred-1): # forecast[k,i]=forecast[k,i]+b0*b1**k # forecast[j,i]=forecast[j,i] + b1**j*da[da.shape[0]-1,i] # heere's was bug return forecast
def Sim_AR(prices: np.array): """Vector Autoregressive Baseline Generator.""" # VAR model var = AR(prices) model_fit = var.fit() # make prediction return model_fit.predict(len(prices), len(prices))
def gaussian_var_copula_entropy_rate(sample, p=None, robust=False, p_ic='hqic'): """ Estimates the entropy rate of the copula-uniform dual representation of a stationary Gaussian VAR(p) (or AR(p)) process from a sample path. We recall that the copula-uniform representation of a :math:`\\mathbb{R}^d`-valued process :math:`\\{x_t\\} := \\{(x_{1t}, \\dots, x_{dt}) \\}` is, by definition, the process :math:`\\{ u_t \\} := \\{ \\left( F_{1t}\\left(x_{1t}\\right), \\dots, F_{dt}\\left(x_{dt}\\right) \\right) \\}` where :math:`F_{it}` is the cummulative density function of :math:`x_{it}`. It can be shown that .. math:: h\\left( \\{ x_t \\}\\right) = h\\left( \\{ u_t \\}\\right) + \\sum_{i=1}^d h\\left( x_{i*}\\right) where :math:`h\\left(x_{i*}\\right)` is the entropy of the i-th coordinate process at any time. Parameters ---------- sample: (T, d) np.array Array of T sample observations of a :math:`d`-dimensional process. p : int or None Number of lags to compute for the autocovariance function. If :code:`p=None` (the default), it is inferred by fitting a VAR model on the sample, using as information criterion :code:`p_ic`. robust: bool If True, the Pearson autocovariance function is estimated by first estimating a Spearman rank correlation, and then inferring the equivalent Pearson autocovariance function, under the Gaussian assumption. p_ic : str The criterion used to learn the optimal value of :code:`p` (by fitting a VAR(p) model) when :code:`p=None`. Should be one of 'hqic' (Hannan-Quinn Information Criterion), 'aic' (Akaike Information Criterion), 'bic' (Bayes Information Criterion) and 't-stat' (based on last lag). Same as the 'ic' parameter of :code:`statsmodels.tsa.api.VAR`. Returns ------- h : float The entropy rate of the copula-uniform dual representation of the input process. p : int Order of the VAR(p). """ _sample = sample[~np.isnan(sample).any(axis=1)] if len(sample.shape) > 1 else sample[~np.isnan(sample)] if p == None: # Fit an AR and use the fitted p. max_lag = int(round(12*(_sample.shape[0]/100.)**(1/4.))) if len(_sample.shape) == 1 or _sample.shape[1] == 1: m = AR(_sample) p = m.fit(ic=p_ic).k_ar else: m = VAR(_sample) p = m.fit(ic=p_ic).k_ar x = _sample if len(_sample.shape) > 1 else _sample[:, None] res = -np.sum(0.5*np.log(2.*np.pi*np.e*np.var(x, axis=0))) res += gaussian_var_entropy_rate(x, p, robust=robust) return res, p
def AR_struct(data, start, maxlag=12, ic='aic'): ''' Function which determines the optimal number of lags to include in the model for every time series Parameters ---------- data : pd.DataFrame Dataframe which includes the (prepared) time series. start : int defines the number of periods in the train set --> do not use future information. maxlag : int The maximum number of lags allowed in the model. For NPL time series many lags might result in infinite forecasts, because the model has now idea which magnitude to give the coefficients of those lags. Moreover, it does not seem logical that the values of more than x periods ago influences the time series of the next period. The default is 8. Returns ------- AR_struct_dict : dictionary DESCRIPTION. ''' #Import libraries from statsmodels.tsa.api import AR #Initiate a dictionary to save all results AR_struct_dict = {} #Calculate the number of lags for every time series for col in data.columns: try: AR_struct_dict[col] = AR(data[col].iloc[:start]).fit(maxlag=maxlag, ic=ic).k_ar except: AR_struct_dict[col] = 1 #return the dictionary return AR_struct_dict
def back_test(data, start_index=50000, train_len=3000, p=4, steps=16): pre_data = [] for x in xrange(start_index, data.shape[0] - steps): rw = AR(data[x - train_len:x]).fit(p) ar_pre = _ar_predict_out_of_sample(data, np.array(copy.deepcopy(rw.params)), p, 1, steps, start=x) keep_pre = keep_predict(data, x, train_len) pre_data.append([ data.index[x + steps - 1], ar_pre[steps - 1], keep_pre[steps - 1] ]) result = pd.DataFrame(pre_data, columns=['ptime', 'ar', 'keep']) result['ptime'] = pd.to_datetime(result['ptime']) result = result.set_index(keys='ptime') return result
def common_shocks(X): mod = AR(X) res = mod.fit(1) res = res.params return (res)
#coding:utf-8 import pandas as pd import numpy as np from statsmodels.tsa.api import AR file_path = './data/zdtmyts_mer.csv' df = pd.read_csv(file_path) x = df['real_power'] / df['theoryp'] df['x_transfer'] = [l if l > 0 else 0 for l in x] df['y'] = np.log( (df['x_transfer'] / (pd.Series([1 for i in range(df.shape[0])]) - df['x_transfer'])) + pd.Series([1 for i in range(df.shape[0])])) df.index = pd.DatetimeIndex(df['time']) #data = df['y'] print df['y'] model = AR(df['y']) result = model.fit() #print result.summary()
param[0, :] = res.x print( "weight coefs to calculate joint probabilities from 5 extremal probabilities: " ) print(res.x) ###### Train the extremal probabilities with autoregression ###### #### AR model #### from statsmodels.tsa.api import AR coef = np.empty((NumJoint, NumExtrem, lag_max + 1)) sigma2 = np.empty((NumJoint, NumExtrem)) stdev = np.empty(TotalNumNode) lag = np.empty((NumJoint, NumExtrem)) for i in range(0, NumJoint): for j in range(0, NumExtrem): ar_model = AR(Pjt[i, j, 0:Trainingsteps]) ar_results = ar_model.fit(maxlag=lag_max) coef[i, j, :] = ar_results.params sigma2[i, j] = ar_results.sigma2 lag[i, j] = ar_results.k_ar print('Coef. of AR model: ') print(coef) print('Sigma of AR model: ') print(np.sqrt(sigma2)) ### use AR model to predict extremal probabilities for t in range(0, Trainingsteps): for i in range(0, NumJoint): for j in range(0, NumExtrem): Pjt_predicted[i, j, t] = Pjt[i, j, t]
print(is_stable(model_fit.coefs)) print(model_fit.resid_acorr(nlags=10)) # roll trough the training data for c in var_combs: # translate into variable names c_vars = "VAR (" + ', '.join([bond_data_colnames[x] for x in c]) + ")" # select variables to VAR X_all = rvol.iloc[:, c].values # select time window for t in range(rw, T): X = X_all[(t - rw):t, :] # for one variable run simple AR if len(c) == 1: # estimate the model model = AR(X) model_fit = model.fit(5) model_fct = model_fit.predict(start=rw, end=(rw + 21)) # save forecasts if 0 in c: # check for TU ind_fcts_1_TU.iloc[(t - rw), TU_colnames.index(c_vars)] = model_fct[0] if (t - rw) < (T - rw - 4): ind_fcts_5_TU.iloc[( t - rw), TU_colnames.index(c_vars)] = model_fct[4] if (t - rw) < (T - rw - 21): ind_fcts_22_TU.iloc[( t - rw), TU_colnames.index(c_vars)] = model_fct[21] if 1 in c: # check for FV ind_fcts_1_FV.iloc[(t - rw), FV_colnames.index(c_vars)] = model_fct[0]