Example #1
0
def lead_lag(dfx, maxlags=10, steps_to_forecast=1, preprocess=0, display=True):
    if preprocess == 0:
        df = dfx  # preprocess:0 --> do nothing
    elif preprocess == 1:
        #df = returns_df(dfx)                    # preprocess:1 --> returns (diff of the log of the values)
        df = diff_df(dfx)
    elif preprocess == 2:
        df = normalize_df(
            dfx, display=display
        )  # preprocess:2 --> normalize using mean and standard deviation
    model = smt.VAR(df)
    model.select_order(maxlags=maxlags, verbose=display)
    results = model.fit(maxlags=maxlags, ic='aic')
    if display: print results.summary()
    lag_order = results.k_ar
    steps_to_forecast = 1
    #print "lag_order: {0}".format(lag_order)
    dfz = df[-lag_order:]
    forecast = results.forecast(dfz.values, steps_to_forecast)
    if display:
        print "forecast (lag_order={0}):\n{1}\n".format(lag_order, forecast)
    #df[['Forecast_btc','Forecast_eth']] = results.fittedvalues
    #df[['Forecast_X','Forecast_Y']] = res.fittedvalues
    size = res.fittedvalues.shape[0]
    df = df[-size:].copy()
    #df[['Forecast_x','Forecast_y']] = res.fittedvalues
    return results, forecast, df.dropna(), lag_order
Example #2
0
def VarSimul(data, H):
    model = sm.VAR(data)
    results = model.fit(H)
    VARcoeff = results.params[1:]
    VARcoeff = np.array(VARcoeff).reshape(
        len(VARcoeff) / len(data.columns), len(data.columns),
        len(data.columns))
    VARstd = results.stderr[1:]
    VARstd = np.array(VARstd).reshape(
        len(VARstd) / len(data.columns), len(data.columns), len(data.columns))
    test = []
    for i in range(1000):
        VarSim = np.zeros(
            (len(VARcoeff) / len(data.columns), len(data.columns),
             len(data.columns)))
        for j in range(VarSim.shape[0]):
            for k in range(VarSim.shape[1]):
                for l in range(VarSim.shape[2]):
                    VarSim[j][k, l] = np.random.normal(VARcoeff[j][k, l],
                                                       VARstd[j][k, l])
        marep = ma_rep(VarSim, 10)
        test.append(marep[1][0, 0])
    print np.std(test)
    seaborn.distplot(test, norm_hist=True)
    plt.show()
def EstimateVAR(data, H):
    """

    :param data: A numpy array of log returns
    :param H: integer, size of step ahead forecast
    :return: a dataframe of connectivity or concentration parameters
    """

    model = sm.VAR(data)
    results = model.fit(maxlags=10, ic='aic')

    SIGMA = np.cov(results.resid.T)
    ma_rep = results.ma_rep(maxn=H)
    GVD = np.zeros_like(SIGMA)

    r, c = GVD.shape
    for i in range(r):
        for j in range(c):
            GVD[i, j] = 1 / np.sqrt(SIGMA[i, i]) * sum(
                [ma_rep[h, i].dot(SIGMA[j])**2 for h in range(H)]) / sum([
                    ma_rep[h, i, :].dot(SIGMA).dot(ma_rep[h, i, :])
                    for h in range(H)
                ])
            # GVD[i,j] = SIGMAINV[i,i] * sum([ma_rep[h,i].dot(SIGMA[j])**2 for h in range(H)]) / sum([ma_rep[h,i,:].dot(SIGMA).dot(ma_rep[h,i,:]) for h in range(H)])
        GVD[i] /= GVD[i].sum()

    return pd.DataFrame(GVD), SIGMA, ma_rep, results.resid.T
Example #4
0
def SOI(days=50):
    H = 15
    data = pd.read_csv('data/TData9313_final6.csv',index_col=0)
    data = np.log(data).diff()[1:]
    data.index = pd.to_datetime(data.index)
    ddate = datetime.datetime(1994,12,27)
    soidf = pd.DataFrame()
    print days
    while ddate<datetime.datetime(2014,1,1):
        datestr2 = ddate.strftime('%Y%m%d')
        datestr1 = (ddate-datetime.timedelta(days)).strftime('%Y%m%d')
        print datestr1,datestr2, "\t",
        td = data[datestr1:datestr2].dropna(axis=1, how='any')
        model = sm.VAR(td)
        results = model.fit(maxlags=H, ic='aic')
        SIGMA = np.cov(results.resid.T)
        _ma_rep = results.ma_rep(maxn=H)
        GVD = np.empty_like(SIGMA)
        r, c = GVD.shape
        for i in range(r):
            for j in range(c):
                GVD[i, j] = 1 / np.sqrt(SIGMA[i, i]) * sum([_ma_rep[h, i].dot(SIGMA[j]) ** 2 for h in range(H)]) / sum(
                    [_ma_rep[h, i, :].dot(SIGMA).dot(_ma_rep[h, i, :]) for h in range(H)])
            GVD[i] /= GVD[i].sum()
        soi = (len(GVD)-np.trace(np.array(GVD)))/len(GVD)
        print soi
        soidf.loc[td.index[-1].strftime("%Y%m%d"),'SOI'] = soi
        soidf.loc[td.index[-1].strftime("%Y%m%d"),'LL'] = int((len(results.params)-1)/len(td.columns))
        ddate += datetime.timedelta(1)
        soidf.to_csv('SOI_%s_days.csv' % (days,),mode='w',header=True)
Example #5
0
def EstimateVAR(data, H, sparse_method=False, GVD_output=True):
    """

    :param data: A numpy array of log returns
    :param H: integer, size of step ahead forecast
    :return: a dataframe of connectivity or concentration parameters
    """
    model = sm.VAR(data)
    results = model.fit(maxlags=H, ic='aic')

    SIGMA = np.cov(results.resid.T)

    if sparse_method == True:
        exit("METODEN BRUGER RESULTS.COEFS FREM FOR PARAMS")
        _nAssets = results.params.shape[1]
        _nLags = results.params.shape[0] / results.params.shape[1]

        custom_params = np.where(abs(results.params / results.stderr) > 1.96, results.params, 0)[1:].reshape(
            (_nLags, _nAssets, _nAssets))
        _ma_rep = ma_rep(custom_params, maxn=H)
    else:
        _ma_rep = results.ma_rep(maxn=H)

    GVD = np.empty_like(SIGMA)

    if GVD_output:
        r, c = GVD.shape
        for i in range(r):
            for j in range(c):
                GVD[i, j] = 1 / np.sqrt(SIGMA[j, j]) * sum([_ma_rep[h, i].dot(SIGMA[j]) ** 2 for h in range(H)]) / sum(
                    [_ma_rep[h, i, :].dot(SIGMA).dot(_ma_rep[h, i, :]) for h in range(H)])
            GVD[i] /= GVD[i].sum()

    return pd.DataFrame(GVD), SIGMA, _ma_rep, results.resid
Example #6
0
def VarSimul2(data, H):
    model = sm.VAR(data)
    results = model.fit(H)
    VARcoeff = results.params[1:]
    VARcoeff = np.array(VARcoeff).reshape(
        len(VARcoeff) / len(data.columns), len(data.columns),
        len(data.columns))
    VARstd = results.stderr[1:]
    VARstd = np.array(VARstd).reshape(
        len(VARstd) / len(data.columns), len(data.columns), len(data.columns))
    test = []

    for i in range(10000):
        VarSim = np.zeros(
            (len(VARcoeff) / len(data.columns), len(data.columns),
             len(data.columns)))
        for j in range(VarSim.shape[0]):
            for k in range(VarSim.shape[1]):
                for l in range(VarSim.shape[2]):
                    VarSim[j][k, l] = np.random.normal(VARcoeff[j][k, l],
                                                       VARstd[j][k, l])
        marep = ma_rep(VarSim, 15)
        tlist = [marep[j][9, 0] for j in range(marep.shape[0])]

        test.append(tlist)

    mean = [np.mean([j[i] for j in test]) for i in range(len(test[0]))]
    up = [
        np.percentile([j[i] for j in test], 97.5) for i in range(len(test[0]))
    ]
    down = [
        np.percentile([j[i] for j in test], 2.5) for i in range(len(test[0]))
    ]
    plt.plot(mean,
             color=seaborn.xkcd_rgb['cornflower blue'],
             alpha=1,
             linestyle='-')
    plt.plot(up,
             color=seaborn.xkcd_rgb['indian red'],
             alpha=0.5,
             linestyle='--')
    plt.plot(down,
             color=seaborn.xkcd_rgb['indian red'],
             alpha=0.5,
             linestyle='--')
    plt.fill_between(range(len(mean)), up, down, alpha=0.5)
    plt.xlim(1)
    plt.show()
Example #7
0
def MetropolisHastingMCMC(data, H):
    model = sm.VAR(data)
    results = model.fit(H)
    VARcoeff = results.params[1:]
    VARcoeff = np.array(VARcoeff).reshape(
        len(VARcoeff) / len(data.columns), len(data.columns),
        len(data.columns))
    VARstd = results.stderr[1:]
    VARstd = np.array(VARstd).reshape(
        len(VARstd) / len(data.columns), len(data.columns), len(data.columns))
    mean = VARcoeff[1][1, 1]
    std = VARstd[1][1, 1]
    #mean = 1000
    #std = 100
    N = 200000
    s = 10
    r = np.random.normal(mean, std)
    #p = np.random.normal(mean,std)
    p = scipy.stats.norm.pdf(r, mean, std) + 1
    samples = []
    #plt.ion()
    #plt.show()
    for i in range(N):
        if i % 10000 == 0:
            print i
        rn = r + np.random.normal()
        pn = scipy.stats.norm.pdf(rn, mean, std)
        if pn >= p:
            p = pn
            r = rn
        else:
            u = np.random.rand()
            if u < pn / p:
                p = pn
                r = rn
        if i % s == 0:
            samples.append(r)
            #plt.plot(samples)
            #plt.draw()

    samples = samples[int(N / 200):]
    normdata = np.random.normal(mean, std, len(samples))
    seaborn.distplot(samples, norm_hist=True, label='MCMC')
    seaborn.distplot(normdata, norm_hist=True, label='normdata')
    plt.vlines(mean, 0, 7.5)
    plt.legend()
    plt.show()
def main():

    signal = np.ones(150)
    signal[:100] *= 50
    signal[100:] *= 40

    dates = pd.date_range('1/1/2017', periods=150, freq='D')

    ts = pd.Series(signal, index=dates)

    df = pd.DataFrame(ts)

    data = df.as_matrix()

    model = tsa.VAR(data)
    results = model.fit()
    residuals = [np.matrix(e).T for e in list(results.resid)]
Example #9
0
def EstimateVAR(df):
    df = pd.read_csv('data.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.dropna().ffill().set_index('Date')
    #df = df.drop('William Demant Holding',1)

    data = np.log(df).diff().dropna()
    df.to_csv('data.csv')
    model = sm.VAR(data)
    results = model.fit(maxlags=5, ic='aic')
    fevd = results.fevd(10)
    print fevd.summary()
    allcomp = fevd.decomp[:, -1]
    for i, name in zip(fevd.decomp, fevd.names):
        print name
        tempdf = pd.DataFrame(i, columns=fevd.names)
        tempdf.to_csv('test/' + name + '.csv')
    exit()
Example #10
0
def EstimateVARTest(data, H, sparse_method=False):
    """

    :param data: A numpy array of log returns
    :param H: integer, size of step ahead forecast
    :return: a dataframe of connectivity or concentration parameters
    """

    model = sm.VAR(data)
    results = model.fit(maxlags=H, ic='aic')

    SIGMA = np.cov(results.resid.T)

    if sparse_method == True:
        _nAssets = results.params.shape[1]
        _nLags = results.params.shape[0] / results.params.shape[1]

        custom_params = np.where(
            abs(results.params / results.stderr) > 1.96, results.params,
            0)[1:].reshape((_nLags, _nAssets, _nAssets))
        _ma_rep = ma_rep(custom_params, maxn=H)
    else:
        _ma_rep = results.ma_rep(maxn=H)

    GVD = np.zeros_like(SIGMA)

    r, c = GVD.shape
    for i in range(r):
        for j in range(c):
            #GVD[i, j] = 1 / np.sqrt(SIGMA[i, i]) * sum([_ma_rep[h, i].dot(SIGMA[j]) ** 2 for h in range(H)]) / sum([_ma_rep[h, i, :].dot(SIGMA).dot(_ma_rep[h, i, :]) for h in range(H)])
            GVD[i,
                j] = sum([_ma_rep[h, i].dot(SIGMA[j])**2
                          for h in range(H)]) / sum([
                              _ma_rep[h, i, :].dot(SIGMA).dot(_ma_rep[h, i, :])
                              for h in range(H)
                          ])
        #GVD[i] /= GVD[i].sum()

    print pd.DataFrame(SIGMA) * 10000000
    print pd.DataFrame(GVD) * 10000000
    print pd.DataFrame(SIGMA) - pd.DataFrame(GVD)

    return pd.DataFrame(GVD), SIGMA, _ma_rep, results.resid
Example #11
0
def EstimateVAR_slow():
    df = pd.read_csv(
        'C:/Users/thoru_000/Dropbox/Pers/PyCharmProjects/Speciale/data.csv',
        sep=";")
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.dropna().ffill().set_index('Date')
    data = np.log(df).diff().dropna()

    model = sm.VAR(data)
    results = model.fit(maxlags=5, ic='aic')

    coeff = results.coefs
    SIGMA = np.cov(results.resid.T)
    ma_rep = results.ma_rep(maxn=10)

    mse = results.mse(10)

    GVD = np.zeros_like(SIGMA)

    r, c = GVD.shape
    for i in range(r):
        for j in range(c):
            sel_j = np.zeros(r)
            sel_j[j] = 1
            sel_i = np.zeros(r)
            sel_i[i] = 1

            AuxSum = 0
            AuxSum_den = 0

            for h in range(10):
                AuxSum += (sel_i.T.dot(ma_rep[h]).dot(SIGMA).dot(sel_j))**2
                AuxSum_den += (sel_i.T.dot(ma_rep[h]).dot(SIGMA).dot(
                    ma_rep[h].T).dot(sel_i))

            GVD[i, j] = (AuxSum * (1 / SIGMA[i, i])) / AuxSum_den

        GVD[i] /= GVD[i].sum()

    pd.DataFrame(GVD).to_csv('GVD.csv', index=False, header=False)
Example #12
0
def Main():
    H = 10
    data = pd.read_csv("Data/Index_data.csv", sep=';')
    data['Date'] = pd.to_datetime(data['Date'])
    data = data.set_index('Date')
    #data = data[['XLY','XLE','XLP']]
    model = sm.VAR(data)
    results = model.fit(maxlags=5, ic='aic')
    eps = results.resid
    SIGMA = np.cov(eps.T)
    ma_rep = results.ma_rep(maxn=H)
    nn = 0
    df = pd.DataFrame()
    print pd.DataFrame(SIGMA)
    for j in range(H):
        tt = SIGMA[nn, nn]**(-0.5) * (ma_rep[j, nn]).dot(SIGMA)
        print ma_rep[j, nn]
        for nr, i in enumerate(tt):
            df.loc[j, nr] = i
    for nr, j in enumerate(df.columns):
        plt.plot(df[j], label=data.columns[nr])
    plt.legend()
    plt.show()
Example #13
0
data = sm.datasets.macrodata.load()
mdata = data.data
df = DataFrame.from_records(mdata)
quarter_end = frequencies.BQuarterEnd()
df.index = [quarter_end.rollforward(datetime(int(y), int(q) * 3, 1))
for y, q in zip(df.pop('year'), df.pop('quarter'))]
logged = np.log(df.ix[:, ['m1', 'realgdp', 'cpi']])
logged.plot(subplots=True)

log_difference = logged.diff().dropna()
plot_acf_multiple(log_difference.values)

#Example TSA VAR

model = tsa.VAR(log_difference, freq='D')
print(model.select_order())

res = model.fit(2)
print(res.summary())
print(res.is_stable())

irf = res.irf(20)
irf.plot()

fevd = res.fevd()
fevd.plot()

#print res.test_whiteness()
print(res.test_causality('m1', 'realgdp'))
#print res.test_normality() # exception
def VARmodel(dataset):
    VARmodel = sm.VAR(dataset)
    VARmodel_fit = VARmodel.fit(ic='bic', trend='c')
    return VARmodel_fit
Example #15
0
def VecAutoReg_fit(endog, maxlag=None, method='ols', trend='c'):
    return tsa.VAR(endog).fit(maxlags=maxlag, method=method, trend=trend)
#from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler
from statsmodels.tsa.arima_model import ARIMA

np.random.seed(7)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore")

df = pd.read_hdf('DeepLearning.h5', 'Data_Gold')

for c in df.columns:
    df[c + '_ret'] = df[c].pct_change(2).fillna(0)

data = df.loc[:, ['Gold_ret', 'DJI_ret', 'Inflation_ret']]
var = sm.VAR(data)
#selecting VAR order
print(var.select_order(10))
results = var.fit(4)
x_pred = results.fittedvalues

df.loc[:, 'Pred_ret'] = x_pred.loc[:, 'Gold_ret']  #arima.fittedvalues
df = df
df['Pred'] = df.Gold
for j in range(len(df.index)):
    if j < 4:
        continue
    i = df.index[j]
    prev = df.index[j - 2]
    df.loc[i, 'Pred'] = df.loc[prev, 'Pred'] * (1 + df.loc[i, 'Pred_ret'])
df.to_hdf('DeepLearning.h5', 'Pred_VAR')
Example #17
0
X = np.concatenate(
    [pc[1:, None], e[1:, None], cpi[1:, None], y[1:, None], m[1:, None]],
    axis=1)
var_names = ['pc', 'e', 'log cpi', 'log r gdp', 'log Dm2m']
shock_names = ['com. price', 'infl.-target', 'cost-push', 'demand', 'mon. pol']

dates = pd.date_range(start='1992-01', periods=len(X), freq='M')
pd.DatetimeIndex(dates)
endog = pd.DataFrame(X, columns=var_names)
endog.set_index(dates)

print('\nAre there any missings?', pd.isnull(endog).any().any(), '.')

#select lag order by criteria
lag_crit = sm.VAR(endog, dates=dates, freq='m').select_order()
p = lag_crit['aic']
h = 48
sample_split = '2007'

recVar = sm.VAR(endog, dates=dates, freq='m').fit(maxlags=p)

' gather some infos about the specification for saving'
sample = [
    str(recVar.dates.min()).split()[0],
    str(recVar.dates.max()).split()[0]
]
sample = '_to_'.join(sample)
lags = ': '.join(['lags', str(p)])
info = ', '.join([sample, lags])
Example #18
0
    cvt = out.cvt[:, int(np.round((0.1 - alpha) / 0.05))]
    int(np.round((0.1 - alpha) / 0.05))
    alpha = 0.05
    int(np.round((0.1 - alpha) / 0.05))
    alpha = 0.1
    int(np.round((0.1 - alpha) / 0.05))
    alpha = 0.05
    cvt = out.cvt[:, int(np.round((0.1 - alpha) / 0.05))]
    cvt
    traces
    out.lr1
    import statsmodels.tsa.api as smt
    from statsmodels.tsa.api import VAR


mod = smt.VAR(pdf)
res = mod.fit(maxlags=5, ic='aic')
res
res.summary()
print(res.summary())
mod.lag
lag_order = results.k_ar
lag_order = res.k_ar
lag_order
res.forecast(pdf.values[-lag_order:], 5)
res.plot_forecast(10)
pred = res.forcast(pdf.values[-2 * lag_order: -lag_order], 5)
pred = res.forecast(pdf.values[-2 * lag_order: -lag_order], 5)
pred
res.forecast(pdf.values[-lag_order:], 5)
res.plot_forecast(10)
Example #19
0
# plt.show()

nobs = 15
X_train, X_test = dataset[0:-nobs], dataset[-nobs:]
print(X_train.shape)
print(X_test.shape)

transform_data = X_train.diff().dropna()
print(transform_data.head())
print(transform_data.describe())

# TODO Stationarity check
# TODO Stationarity check
# TODO Stationarity check

mod = smt.VAR(transform_data)
res = mod.fit(maxlags=15, ic='aic')
print(res.summary())

# TODO Durbin-Watson Statistic

pred = res.forecast(transform_data.values[3:], 15)
pred_df = pd.DataFrame(pred,
                       index=dataset.index[-15:],
                       columns=dataset.columns)
print(pred_df)

pred_inverse = pred_df.cumsum()
f = pred_inverse + X_test  #.shift(1)
print(f)
#
Example #20
0
def cusum_algorithm(data, critical_value):

    num_dimensions = len(data[0])
    total_time = len(data)



    ################
    #### STEP 1 ####
    ################

    # Estimate a VAR(p) model and compute the residuals.
    model = tsa.VAR(data)
    results = model.fit()
    residuals = [ np.matrix(e).T for e in list(results.resid) ]
    d = int(num_dimensions * (results.k_ar + 0 + 1)
            + (num_dimensions * (num_dimensions + 1)) / 2 + 1)

    possible_changepoints = [0, total_time-1]

    # Initialize h_first and h_last to the endpoints +/- d.
    h_first = d
    h_last = total_time-1 - d

    while True:

        if ( h_first >= h_last ):
            break
            
        ################
        #### STEP 2 ####
        ################

        # Find the most likely changepoint (if any) between h_first and h_last.
        Gamma_max, h_max, Cs = max_Cs(h_first, h_last,
                                      residuals, num_dimensions)
        '''
        plt.plot(range(h_first, h_last+1), Cs)
        plt.axhline(critical_value, color='red')
        plt.axvline(h_max, color='gray', linestyle='--')
        plt.show()
        '''

        # If there are none between h_first and h_last, skip to step 4.
        if Gamma_max < critical_value:
            break

        # Otherwise, look for more changepoints.
        else:
            old_Gamma_max = Gamma_max
            old_h_max = h_max

            #################
            #### STEP 3a ####
            #################

            # Find the leftmost possible changepoint (i.e., leftmost point
            # with a significant test statistic). Make it the new h_first.
            while Gamma_max > critical_value:
                t_2 = h_max - 1
                Gamma_max, h_max, Cs = max_Cs(h_first, t_2,
                                              residuals, num_dimensions)
            h_first = t_2

            #################
            #### STEP 3b ####
            #################

            # Find the rightmost possible changepoint (i.e., rightmost point
            # with a significant test statistic). Make it the new h_last.
            Gamma_max = old_Gamma_max
            h_max = old_h_max
            while Gamma_max > critical_value:
                t_1 = h_max + 1
                Gamma_max, h_max, Cs = max_Cs(t_1, h_last,
                                              residuals, num_dimensions)
            h_last = t_1

            #################
            #### STEP 3c ####
            #################

            # If the time between h_first and h_last is higher our resolution d,
            # record them and repeat steps 2 and 3 in narrower interval.
            if np.abs(h_last - h_first) > d:
                possible_changepoints.append(h_first)
                possible_changepoints.append(h_last)

                h_first = h_first + d
                h_last = h_last - d
            # Otherwise, record the most likely changepoint from before and then
            # go to step 4.
            else:
                possible_changepoints.append(old_h_max)
                break


    ################
    #### STEP 4 ####
    ################

    possible_changepoints.sort()

    # Delete possible changepoints until convergence.
    converged = False
    while not converged:
        # For every ith and (i+2)th changepoint, check if the open interval
        # between them is statistically significant. If not, drop the (i+1)th.
        for i in range(len(possible_changepoints)-2):
            Gamma_max, h_max, Cs = max_Cs(possible_changepoints[i]+1,
                                          possible_changepoints[i+2]-1,
                                          residuals, num_dimensions)

            if Gamma_max < critical_value:
                # Mark for deletion.
                possible_changepoints[i+1] = -1

        converged = True
        # Delete the marked ones.
        for i in reversed(range(len(possible_changepoints))):
            if possible_changepoints[i] == -1:
                del possible_changepoints[i]
                converged = False

    # Also delete the endpoints.
    del possible_changepoints[0]
    del possible_changepoints[-1]

    changepoints = [ point + 1 for point in possible_changepoints ]
    return tuple(changepoints)
Example #21
0
comboMV.columns = ["RDA", "RDB", "Total", "Repsol", "Centrica"]
model = statsm.ols(formula="RDA~RDB+Total+Repsol+Centrica", data=comboMV)
resultMV = model.fit()
resultMV.params
resultMV.summary()
statsgraph.plot_fit(resultMV, 1)
#######Partial Auto correlation function PACF & ACF
residualsMV = resultMV.resid
acfModel = acf(residualsMV)
plot_acf(acfModel)
acfModel.summary()
pacfModel = pacf(residualsMV)
plot_pacf(pacfModel)
####################
# Lag 2
model = mod.VAR(comboMV)
model.select_order(10)

varlag = 2
results = model.fit(varlag)
#results.summary()
coefs = results.coefs[varlag - 1]
residuals = results.resid

stationary_test(residuals, name='Stationarity Test on the Spread')
plt.plot(residuals)
plt.legend(
    loc='best',
    fontsize=8,
    labels=["Royal Dutch A", "Royal Dutch B", "Total", "Repsol", "Centrica"])
plt.xlabel('3 Year Time Series in Days', fontsize=10)
Example #22
0
        for r in X_train.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i + 1][0][test][1], 4) for i in range(maxlag)]
            if verbose: print(f'Y= {r}, X = {c}, P-Values = {p_values}')
            min_p_value = np.min(p_values)
            X_train.loc[r, c] = min_p_value

    X_train.columns = [var + '_x' for var in variables]
    X_train.index = [var + '_y' for var in variables]
    return X_train


def cointegration_test(transform_data, alpha=0.05):
    out = coint_johansen(transform_data, -1, 5)
    d = {'0.90': 0, '0.95': 1, '0.99': 2}
    traces = out.lr1
    cvts = out.cvt[:, d[str(1 - alpha)]]

    def adjust(val, length=6): return str(val).ljust(length)

    print('Name :: Test Stat > C(95%)   =>   Signif \n', '--' * 20)
    for col, trace, cvt in zip(transform_data.columns, traces, cvts):
        print(adjust(col), ':: ', adjust(round(trace, 2), 9), ">", adjust(cvt, 8), ' =>   ', trace > cvt)


mod = smt.VAR(X_train)
res = mod.fit(maxlags=maxlag, ic='aic')
print(res.summary())


Example #23
0
data = data_raw.loc['1992-02':'2016-07']
nobs = data.shape[0]

var_list = data.columns

X = data[['l_cpi', 'u',  mb], axis=1)

var_names = [price_name, rea_name, info_name, div_name, uc_name, 'mb']
shock_names = ['cost push', 'demand', 'pcom/infl.target', 'mon. pol', 'finance cost', 'mon. base']


#select lag order by criteria
#p = sm.VAR(X, dates=dates, freq='m').select_order(disp=0)['aic']
#print(p)
h = 48
recVar = sm.VAR(X, dates=X.index, freq='m').fit(maxlags=15, ic='aic')
ASigma = np.linalg.cholesky(recVar.sigma_u)
# save the lag length
p = recVar.k_ar

' gather some infos about the specification for saving'
sample = [str(recVar.dates.min()).split()[0], str(recVar.dates.max()).split()[0]]
sample = '_to_'.join(sample)
lags = ': '.join(['lags', str(p)])
info_string = ', '.join([sample, lags])

irfs_analysis = recVar.irf(h)
ci_lower, ci_upper = recVar.irf_errband_mc(T=48)

from plotirfs import plot_irfs
rec_irfs = plot_irfs(irfs_analysis.irfs, ci_lower, ci_upper, imps=[0, 1, 3], resps=[0, 1, 2, 3, 4, 5],
Example #24
0
data = data.diff().dropna()
data.head()

# Out[101]:

#                     blue      goog
#     2011-01-16  0.000358 -0.074634
#     2011-01-23  0.001575  0.000000
#     2011-01-30  0.000335  0.026329
#     2011-02-06 -0.002415  0.024824
#     2011-02-13  0.001094 -0.051153

# In[122]:

# make a VAR model
model = api.VAR(data)

# check on order of variables
model.select_order(8)

# Out[122]:

#                      VAR Order Selection
#     =====================================================
#                aic          bic          fpe         hqic
#     -----------------------------------------------------
#     0       -14.51       -14.48    4.972e-07       -14.50
#     1       -14.71       -14.61    4.080e-07       -14.67
#     2       -14.79       -14.63    3.780e-07       -14.72
#     3      -14.88*      -14.65*   3.447e-07*      -14.79*
#     4       -14.88       -14.59    3.454e-07       -14.76
Example #25
0
#irf.plot(response='PES10 Index')
#irf.plot_cum_effects(orth=False)
#
#results.test_causality('PES10 Index', ['PES05 Index', 'PES07 Index','PES02 Index'], kind='f')
#
#var = sm.DynamicVAR(data, lag_order=2, window_type='expanding')
#var.coefs
#var.forecast(2)
#var.plot_forecast(2)

# Analizando el impacto de la TPM sobre la curva

tpmRatesData = b.getHistDataFromBloomberg([
    'CHOVCHOV Index', 'PES02 Index', 'PES05 Index', 'PES07 Index',
    'PES10 Index'
],
                                          init=dt.datetime(2006, 12, 20),
                                          end=dt.datetime(2016, 12, 20),
                                          freq='MONTHLY')

data = tpmRatesData.diff().dropna()

model = sm.VAR(data)
results = model.fit(1)
results.summary()
irf = results.irf(10)
irf.plot(impulse='CHOVCHOV Index')
irf.plot_cum_effects(orth=False)
irf.plot_cum_effects(impulse='CHOVCHOV Index')
results.test_causality('PES10 Index', ['CHOVCHOV Index'], kind='f')
Example #26
0
# In[34]:

for each in df_train.columns:
    res = adfuller(df_train[each].diff(1).dropna())
    if res[1] < .05:
        print("Series " + each + " is Stationary")
    else:
        print("Series " + each + " is Non-Stationary")

# ### VAR model
# Now we will fit a VAR model for the variables. To get the order of the var model we have loop through various order then chose the model with lowest AIC value. Here for order 2 we have gotten the lowest AIC. So, we have fitted the VAR with order 2.

# In[35]:

df_tdiff = df_train.diff(1).dropna()
var_model = smt.VAR(df_tdiff)

# In[36]:

best_aic = np.inf
order = None
fitted_model = None
for i in range(7):
    res = var_model.fit(i + 1)
    print(res.aic)
    if res.aic < best_aic:
        best_aic = res.aic
        order = i + 1
        fitted_model = res
    else:
        continue