Exemple #1
0
def forecast_AR(da, pred):
    forecast = np.zeros(shape=(pred,da.shape[1]))
    #forecasting per maturity (suppose no dependance between maturities)
    for i in range(0,da.shape[1]):
        #fit to time series y_{tau} for fixed tau
        model = AR(da[:,i])
        model_fitted = model.fit(1)
        
        #assume y_{tau,t+1} = b0 + b1*y_{tau,t} + \epsilon
        b0=model_fitted.params[0]
        b1=model_fitted.params[1]
        
        #print(model_fitted.params)
        
        # compute predictions
        mu = b0 / (1-b1) # compute the mean in the reparametrisation: x_t - mu = b_1 ( x - mu_{t-1}) + e_t
        
        for j in range(0,pred):
            forecast[j,i] = mu + b1**(j+1) * (da[-1,i]-mu)
        
#        #compute predictions by rolling out the recursion : \hat{y}_{tau,t+j} = b0*(1+...+b1^{j-1}) + b1^j * \hat{y}_{tau,t} 
#        for j in range(0,pred):
#            for k in range(j,pred-1):
#                forecast[k,i]=forecast[k,i]+b0*b1**k
#            forecast[j,i]=forecast[j,i] + b1**j*da[da.shape[0]-1,i] # heere's was bug
    return forecast
Exemple #2
0
def Sim_AR(prices: np.array):
    """Vector Autoregressive Baseline Generator."""
    # VAR model
    var = AR(prices)
    model_fit = var.fit()
    # make prediction
    return model_fit.predict(len(prices), len(prices))
Exemple #3
0
def gaussian_var_copula_entropy_rate(sample, p=None, robust=False, p_ic='hqic'):
	"""
	Estimates the entropy rate of the copula-uniform dual representation of a stationary Gaussian VAR(p) (or AR(p)) process from a sample path.

	We recall that the copula-uniform representation of a :math:`\\mathbb{R}^d`-valued process :math:`\\{x_t\\} := \\{(x_{1t}, \\dots, x_{dt}) \\}`
	is, by definition, the process :math:`\\{ u_t \\} := \\{ \\left( F_{1t}\\left(x_{1t}\\right), \\dots, F_{dt}\\left(x_{dt}\\right) \\right) \\}` 
	where :math:`F_{it}` is the cummulative density function of :math:`x_{it}`.

	It can be shown that 

	.. math::
		h\\left( \\{ x_t \\}\\right) = h\\left( \\{ u_t \\}\\right) + \\sum_{i=1}^d h\\left( x_{i*}\\right) 

	where :math:`h\\left(x_{i*}\\right)` is the entropy of the i-th coordinate process at any time.



	Parameters
	----------
	sample: (T, d) np.array 
		Array of T sample observations of a :math:`d`-dimensional process.
	p : int or None
		Number of lags to compute for the autocovariance function. If :code:`p=None` (the default), it is inferred by fitting a VAR model on the sample, using as information criterion :code:`p_ic`.
	robust: bool
		If True, the Pearson autocovariance function is estimated by first estimating a Spearman rank correlation, and then inferring the equivalent Pearson autocovariance function, under the Gaussian assumption.
	p_ic : str
		The criterion used to learn the optimal value of :code:`p` (by fitting a VAR(p) model) when :code:`p=None`. 
		Should be one of 'hqic' (Hannan-Quinn Information Criterion), 'aic' (Akaike Information Criterion), 'bic' (Bayes Information Criterion) and 't-stat' (based on last lag). 
		Same as the 'ic' parameter of :code:`statsmodels.tsa.api.VAR`.
	Returns
	-------
	h : float
		The entropy rate of the copula-uniform dual representation of the input process.
	p : int
		Order of the VAR(p).
	"""
	_sample = sample[~np.isnan(sample).any(axis=1)] if len(sample.shape) > 1 else sample[~np.isnan(sample)]
	if p == None:
		# Fit an AR and use the fitted p.
		max_lag = int(round(12*(_sample.shape[0]/100.)**(1/4.)))
		if len(_sample.shape) == 1 or _sample.shape[1] == 1:
			m = AR(_sample)
			p = m.fit(ic=p_ic).k_ar
		else:
			m = VAR(_sample)
			p = m.fit(ic=p_ic).k_ar

	x = _sample if len(_sample.shape) > 1 else _sample[:, None]
	res = -np.sum(0.5*np.log(2.*np.pi*np.e*np.var(x, axis=0)))
	res += gaussian_var_entropy_rate(x, p, robust=robust)

	return res, p
def AR_struct(data, start, maxlag=12, ic='aic'):
    '''
    Function which determines the optimal number of lags to include in 
    the model for every time series

    Parameters
    ----------
    data : pd.DataFrame
        Dataframe which includes the (prepared) time series.
    start : int
        defines the number of periods in the train set --> do not use future
        information.
    maxlag : int
        The maximum number of lags allowed in the model. For NPL time series many lags
        might result in infinite forecasts, because the model has now idea which
        magnitude to give the coefficients of those lags. Moreover, it does not seem
        logical that the values of more than x periods ago influences the time series
        of the next period.
        
        The default is 8.

    Returns
    -------
    AR_struct_dict : dictionary
        DESCRIPTION.

    '''
    #Import libraries
    from statsmodels.tsa.api import AR

    #Initiate a dictionary to save all results
    AR_struct_dict = {}

    #Calculate the number of lags for every time series
    for col in data.columns:
        try:
            AR_struct_dict[col] = AR(data[col].iloc[:start]).fit(maxlag=maxlag,
                                                                 ic=ic).k_ar
        except:
            AR_struct_dict[col] = 1

    #return the dictionary
    return AR_struct_dict
Exemple #5
0
def back_test(data, start_index=50000, train_len=3000, p=4, steps=16):
    pre_data = []
    for x in xrange(start_index, data.shape[0] - steps):
        rw = AR(data[x - train_len:x]).fit(p)
        ar_pre = _ar_predict_out_of_sample(data,
                                           np.array(copy.deepcopy(rw.params)),
                                           p,
                                           1,
                                           steps,
                                           start=x)
        keep_pre = keep_predict(data, x, train_len)
        pre_data.append([
            data.index[x + steps - 1], ar_pre[steps - 1], keep_pre[steps - 1]
        ])

    result = pd.DataFrame(pre_data, columns=['ptime', 'ar', 'keep'])
    result['ptime'] = pd.to_datetime(result['ptime'])
    result = result.set_index(keys='ptime')
    return result
Exemple #6
0
def common_shocks(X):

    mod = AR(X)
    res = mod.fit(1)
    res = res.params
    return (res)
Exemple #7
0
#coding:utf-8

import pandas as pd
import numpy as np
from statsmodels.tsa.api import AR

file_path = './data/zdtmyts_mer.csv'
df = pd.read_csv(file_path)
x = df['real_power'] / df['theoryp']
df['x_transfer'] = [l if l > 0 else 0 for l in x]
df['y'] = np.log(
    (df['x_transfer'] /
     (pd.Series([1 for i in range(df.shape[0])]) - df['x_transfer'])) +
    pd.Series([1 for i in range(df.shape[0])]))
df.index = pd.DatetimeIndex(df['time'])
#data = df['y']
print df['y']
model = AR(df['y'])
result = model.fit()
#print result.summary()
Exemple #8
0
param[0, :] = res.x
print(
    "weight coefs to calculate joint probabilities from 5 extremal probabilities: "
)
print(res.x)

###### Train the extremal probabilities with autoregression  ######
#### AR model ####
from statsmodels.tsa.api import AR
coef = np.empty((NumJoint, NumExtrem, lag_max + 1))
sigma2 = np.empty((NumJoint, NumExtrem))
stdev = np.empty(TotalNumNode)
lag = np.empty((NumJoint, NumExtrem))
for i in range(0, NumJoint):
    for j in range(0, NumExtrem):
        ar_model = AR(Pjt[i, j, 0:Trainingsteps])
        ar_results = ar_model.fit(maxlag=lag_max)
        coef[i, j, :] = ar_results.params
        sigma2[i, j] = ar_results.sigma2
        lag[i, j] = ar_results.k_ar

print('Coef. of AR model: ')
print(coef)
print('Sigma of AR model: ')
print(np.sqrt(sigma2))

### use AR model to predict extremal probabilities
for t in range(0, Trainingsteps):
    for i in range(0, NumJoint):
        for j in range(0, NumExtrem):
            Pjt_predicted[i, j, t] = Pjt[i, j, t]
Exemple #9
0
    print(is_stable(model_fit.coefs))
    print(model_fit.resid_acorr(nlags=10))

# roll trough the training data
for c in var_combs:
    # translate into variable names
    c_vars = "VAR (" + ', '.join([bond_data_colnames[x] for x in c]) + ")"
    # select variables to VAR
    X_all = rvol.iloc[:, c].values
    # select time window
    for t in range(rw, T):
        X = X_all[(t - rw):t, :]
        # for one variable run simple AR
        if len(c) == 1:
            # estimate the model
            model = AR(X)
            model_fit = model.fit(5)
            model_fct = model_fit.predict(start=rw, end=(rw + 21))
            # save forecasts
            if 0 in c:  # check for TU
                ind_fcts_1_TU.iloc[(t - rw),
                                   TU_colnames.index(c_vars)] = model_fct[0]
                if (t - rw) < (T - rw - 4):
                    ind_fcts_5_TU.iloc[(
                        t - rw), TU_colnames.index(c_vars)] = model_fct[4]
                if (t - rw) < (T - rw - 21):
                    ind_fcts_22_TU.iloc[(
                        t - rw), TU_colnames.index(c_vars)] = model_fct[21]
            if 1 in c:  # check for FV
                ind_fcts_1_FV.iloc[(t - rw),
                                   FV_colnames.index(c_vars)] = model_fct[0]