예제 #1
0
def test_blank(small_data, std_data):
    small_mod = ZeroMean(small_data, volatility=GARCH(), rescale=False)
    small_res = small_mod.fit(starting_values=np.array([1e-3, 0.05, 0.90]),
                              disp="off")
    mod = ZeroMean(std_data, volatility=GARCH(), rescale=False)
    res = mod.fit(starting_values=np.array([1, 0.05, 0.90]), disp="off")
    assert_allclose(1e3 * small_res.params[0], res.params[0], rtol=5e-3)
예제 #2
0
def test_rescale_fit(small_data, std_data):
    small_mod = ZeroMean(small_data, volatility=GARCH(), rescale=True)
    small_res = small_mod.fit(disp="off")
    direct_mod = ZeroMean(10 * small_data, volatility=GARCH())
    direct_res = direct_mod.fit(disp="off")
    assert_allclose(small_res.loglikelihood, direct_res.loglikelihood)
    small_fcast = small_res.forecast(start=0)
    direct_fcast = direct_res.forecast(start=0)
    assert_allclose(small_fcast.variance, direct_fcast.variance)
예제 #3
0
    def fit(self, start_params=None, maxiter=10000, maxfun=5000, **kwds):
        self.exog_names.append('beta')
        self.exog_names.append('theta')
        self.exog_names.append('a')
        self.exog_names.append('b')
        self.exog_names.append('c_1')
        self.exog_names.append('c_2')

        gar_0 = ConstantMean(data['spread'])
        gar_0.volatility = GARCH(p=2, q=1)
        gar_0_r = gar_0.fit()
        gar_pa_0 = np.array(gar_0_r.params)
        sigma_2 = gar_0_r.conditional_volatility
        #        sigma_2 = np.sqrt(gar_0_r.conditional_volatility)

        mean_0 = statsmodels.tsa.arima_model.ARMA(data['spread'],
                                                  exog=sigma_2,
                                                  order=(0, 1))
        mean_0_r = mean_0.fit()
        mean_pa_0 = np.array(mean_0_r.params)

        #       start_params = np.concatenate([ [-0.001],[0.073],[-0.157] , [gar_pa_0[1]] , [gar_pa_0[4]] , gar_pa_0[2:4]])
        #   start_params = np.array([ -0.001, 0.073, -0.157 , 0.00006 , 0.918 , 0.121, -0.043 ])
        start_params = np.concatenate(
            [mean_pa_0, [gar_pa_0[1]], [gar_pa_0[4]], gar_pa_0[2:4]])
        #       start_params = np.array([ 0.201, 2.41, -0.157 , 0.00006 , 0.918 , 0.121, -0.043 ])
        return super(garch_m, self).fit(start_params=start_params,
                                        maxiter=maxiter,
                                        maxfun=maxfun,
                                        **kwds)
예제 #4
0
def small_data():
    rs = np.random.RandomState([2389280, 238901, 382908031])
    mod = ZeroMean(None,
                   volatility=GARCH(),
                   distribution=Normal(random_state=rs))
    sim = mod.simulate([1e-4, 0.05, 0.90], nobs=1000)
    return sim.data
예제 #5
0
def run_garch_simple(y, mean_model, vol_model, split_date, x=None, verbose=False):

    # specify mean model
    if mean_model == "CONST":
        ls = ConstantMean(y)
    elif mean_model == 'LS':
        ls = LS(y=y, x=x)
    elif mean_model == 'ARX':
        ls = ARX(y=y, lags=1)
    else:
        print("Misspecified mean model name. Please choose between CONST, LS, ARX.")
    
    # specify volatility model
    if vol_model == "GARCH":
        ls.volatility = GARCH(p=1, q=1)
    elif vol_model == "EGARCH":
        ls.volatility = EGARCH(p=1, o=1, q=1)
    elif vol_model == "EWMA":
        ls.volatility = EWMAVariance(lam=None)
    else:
        print("Misspecified volatility process name. Please choose between GARCH, EGARCH, EWMA.")
    
    res = ls.fit(disp='off', last_obs=split_date)
    
    if verbose:
        display(Markdown('#### <br> <br> GARCH model results'))
        print(res.summary())
    
    return res
 def getvolatility(self):
     df = volatility.getyieldrate(self)
     vol = 0.0
     for i in range(1, len(df)):
         vol = vol + df[i] * df[i] / 10000.0
     am = ConstantMean(df)
     am.volatility = GARCH(1, 0, 1)
     am.distribution = Normal()
     res = am.fit()
     print('vol =' + str(vol))
     print(res.summary)
     return 0
예제 #7
0
def run_garch_rolling(y, rvol, model, split_date, x=None, verbose=True, lam=None):

    # specify mean model
    ls = ConstantMean(y=y)
    
    # specify volatility model
    if model == "GARCH":
        ls.volatility = GARCH(p=1, q=1)
    elif model == "EGARCH":
        ls.volatility = EGARCH(p=1, o=1, q=1)
    elif model == "EWMA":
        ls.volatility = EWMAVariance(lam)
    else:
        print("Misspecified volatility process name")
    
    res = ls.fit(disp='off', last_obs=split_date)
    
    forecasts_1d = res.forecast(horizon=1)
    forecasted_vol = forecasts_1d.variance.pow(0.5).shift(1).dropna()
    
    test_merged = rvol.join(forecasted_vol).dropna()
    train_merged = rvol.join(res.conditional_volatility).dropna()

    test_MAE = np.abs(test_merged.iloc[:,0] - test_merged.iloc[:,1]).sum()
    train_MAE = np.abs(train_merged.iloc[:,0] - train_merged.iloc[:,1]).sum()
    MAE = [train_MAE, test_MAE]
    
    test_MSE = np.square(test_merged.iloc[:,0] - test_merged.iloc[:,1]).sum()
    train_MSE = np.square(train_merged.iloc[:,0] - train_merged.iloc[:,1]).sum()
    MSE = [train_MSE, test_MSE]
    
    test_HMAE = np.abs(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).sum()
    train_HMAE = np.abs(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).sum()
    HMAE = [train_HMAE, test_HMAE]
    
    test_HMSE = np.square(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).sum()
    train_HMSE = np.square(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).sum()
    HMSE = [train_HMSE, test_HMSE]

    df_results = pd.DataFrame(data=np.c_[MAE, MSE, HMAE, HMSE].T, columns=[model + ' ' + x for x in ['in-sample', 'out-of-sample']],                               index=['MAE', 'MSE', 'HMAE', 'HMSE']).T
    
    return df_results, len(train_merged), len(test_merged)
예제 #8
0
def return_sampler_garch(
    N_train: int,
    mean_process: str = "Constant",
    lags_mean_process: int = None,
    vol_process: str = "GARCH",
    distr_noise: str = "normal",
    seed: int = None,
    seed_param: int = None,
    p_arg: list = None,
) -> Tuple[np.ndarray, pd.Series]:
    # https://stats.stackexchange.com/questions/61824/how-to-interpret-garch-parameters
    # https://arch.readthedocs.io/en/latest/univariate/introduction.html
    # https://arch.readthedocs.io/en/latest/univariate/volatility.html
    # https://github.com/bashtage/arch/blob/master/arch/univariate/volatility.py
    """
    Generates financial returns driven by mean-reverting factors.

    Parameters
    ----------
    N_train: int
        Length of the experiment

    mean_process: str
        Mean process for the returns. It can be 'Constant' or 'AR'

    lags_mean_process: int
        Order of autoregressive lag if mean_process is AR

    vol_process: str
        Volatility process for the returns. It can be 'GARCH', 'EGARCH', 'TGARCH',
        'ARCH', 'HARCH', 'FIGARCH' or 'Constant'. Note that different volatility
        processes requires different parameter, which are hard coded. If you want to
        pass them explicitly, use p_arg.

    distr_noise: str
        Distribution for the unpredictable component of the returns. It can be
        'normal', 'studt', 'skewstud' or 'ged'. Note that different distributions
        requires different parameter, which are hard coded. If you want to
        pass them explicitly, use p_arg.

    seed: int
        Seed for experiment reproducibility

    seed_param: int
        Seed for drawing randomly the parameters needed for the simulation. The
        ranges provided are obtained as average lower and upper bounds of several
        GARCH-type model fitting on real financial time-series.

    p_arg: pd.Series
        Pandas series of parameters that you want to pass explicitly.
        They need to be passed in the right order. Check documentation of the
        arch python package (https://arch.readthedocs.io/en/latest/index.html) for more details.
    Returns
    -------
    simulations['data'].values: np.ndarray
        Simulated series of returns
    p: pd.Series
        Series  of parameters used for simulation
    """
    names = []
    vals = []

    if seed_param is None:
        seed_param = seed

    rng = np.random.RandomState(seed_param)

    # choose mean process
    if mean_process == "Constant":
        model = ConstantMean(None)
        names.append("const")
        if seed_param:
            vals.append(rng.uniform(0.01, 0.09))
        else:
            vals.append(0.0)

    elif mean_process == "AR":
        model = ARX(None, lags=lags_mean_process)
        names.append("const")
        vals.append(0.0)
        if seed_param:
            for i in range(lags_mean_process):
                names.append("lag{}".format(i))
                vals.append(rng.uniform(-0.09, 0.09))
        else:
            for i in range(lags_mean_process):
                names.append("lag{}".format(i))
                vals.append(0.9)

    else:
        return print("This mean process doesn't exist or it's not available.")
        sys.exit()

    # choose volatility process
    if vol_process == "GARCH":
        model.volatility = GARCH(p=1, q=1)
        names.extend(["omega", "alpha", "beta"])
        if seed_param:
            om = rng.uniform(0.03, 0.1)
            alph = rng.uniform(0.05, 0.1)
            b = rng.uniform(0.86, 0.92)
            garch_p = np.array([om, alph, b]) / (np.array([om, alph, b]).sum())
        else:
            om = 0.01
            alph = 0.05
            b = 0.94
            garch_p = np.array([om, alph, b])
        vals.extend(list(garch_p))

    elif vol_process == "ARCH":
        model.volatility = GARCH(p=1, q=0)

        names.extend(["omega", "alpha"])
        if seed_param:
            om = rng.uniform(1.4, 4.0)
            alph = rng.uniform(0.1, 0.6)
        else:
            om = 0.01
            alph = 0.4
        garch_p = np.array([om, alph])
        vals.extend(list(garch_p))

    elif vol_process == "HARCH":
        model.volatility = HARCH(lags=[1, 5, 22])

        names.extend(["omega", "alpha[1]", "alpha[5]", "alpha[22]"])
        if seed_param:
            om = rng.uniform(1.2, 0.5)
            alph1 = rng.uniform(0.01, 0.1)
            alph5 = rng.uniform(0.05, 0.3)
            alph22 = rng.uniform(0.4, 0.7)
        else:
            om = 0.01
            alph1 = 0.05
            alph5 = 0.15
            alph22 = 0.5
        garch_p = np.array([om, alph1, alph5, alph22])
        vals.extend(list(garch_p))

    elif vol_process == "FIGARCH":
        model.volatility = FIGARCH(p=1, q=1)

        names.extend(["omega", "phi", "d", "beta"])
        if seed_param:
            om = rng.uniform(0.05, 0.03)
            phi = rng.uniform(0.1, 0.35)
            d = rng.uniform(0.3, 0.5)
            beta = rng.uniform(0.4, 0.7)
        else:
            om = 0.01
            phi = 0.2
            d = 0.2
            beta = 0.55
        garch_p = np.array([om, phi, d, beta])
        vals.extend(list(garch_p))

    elif vol_process == "TGARCH":
        model.volatility = GARCH(p=1, o=1, q=1)
        names.extend(["omega", "alpha", "gamma", "beta"])
        if seed_param:
            om = rng.uniform(0.02, 0.15)
            alph = rng.uniform(0.01, 0.07)
            gamma = rng.uniform(0.03, 0.1)
            b = rng.uniform(0.88, 0.94)
        else:
            om = 0.01
            alph = 0.05
            gamma = 0.04
            b = 0.90
        garch_p = np.array([om, alph, gamma, b])
        vals.extend(list(garch_p))

    elif vol_process == "EGARCH":
        model.volatility = EGARCH(p=1, o=1, q=1)
        names.extend(["omega", "alpha", "gamma", "beta"])
        if seed_param:
            om = rng.uniform(0.01, 0.03)
            alph = rng.uniform(0.06, 0.17)
            gamma = rng.uniform(-0.05, -0.02)
            b = rng.uniform(0.97, 0.99)
            garch_p = np.array([om, alph, gamma, b]) / (np.array(
                [om, alph, gamma, b]).sum())
        else:
            om = 0.01
            alph = 0.05
            gamma = -0.02
            b = 0.94
            garch_p = np.array([om, alph, gamma, b])
        vals.extend(list(garch_p))

    elif vol_process == "Constant":
        model.volatility = ConstantVariance()
        names.append("sigma_const")
        vals.append(rng.uniform(0.02, 0.05))
    else:
        print("This volatility process doesn't exist or it's not available.")
        sys.exit()

    if distr_noise == "normal":
        model.distribution = Normal(np.random.RandomState(seed))
    elif distr_noise == "studt":
        model.distribution = StudentsT(np.random.RandomState(seed))
        names.append("nu")
        if seed_param:
            vals.append(rng.randint(6.0, 10.0))
        else:
            vals.append(8.0)
    elif distr_noise == "skewstud":
        model.distribution = SkewStudent(np.random.RandomState(seed))
        names.extend(["nu", "lambda"])
        if seed_param:
            vals.extend([rng.uniform(6.0, 10.0), rng.uniform(-0.1, 0.1)])
        else:
            vals.extend([8.0, 0.05])
    elif distr_noise == "ged":
        model.distribution = GeneralizedError(np.random.RandomState(seed))
        names.append("nu")
        if seed_param:
            vals.append(rng.uniform(1.05, 3.0))
        else:
            vals.append(2.0)
    else:
        print("This noise distribution doesn't exist or it's not available.")
        sys.exit()

    p = pd.Series(data=vals, index=names)
    if p_arg:
        p = p_arg
    simulations = model.simulate(p, N_train) / 100

    return simulations["data"].values, p
예제 #9
0
def return_sampler_GP(
    N_train: int,
    sigmaf: Union[float or list or np.ndarray],
    f_param: Union[float or list or np.ndarray],
    sigma: Union[float or list or np.ndarray],
    HalfLife: Union[int or list or np.ndarray],
    rng: np.random.mtrand.RandomState = None,
    offset: int = 2,
    uncorrelated: bool = False,
    t_stud: bool = False,
    degrees: int = 8,
    vol: str = "omosk",
    dt: int = 1,
    disable_tqdm: bool = False,
) -> Tuple[Union[list or np.ndarray], Union[list or np.ndarray],
           Union[list or np.ndarray]]:
    """
    Generates financial returns driven by mean-reverting factors.

    Parameters
    ----------
    N_train : int
        Length of the experiment

    sigmaf : Union[float or list or np.ndarray]
        Volatilities of the mean reverting factors

    f_param: Union[float or list or np.ndarray]
        Factor loadings of the mean reverting factors

    sigma: Union[float or list or np.ndarray]
        volatility of the asset return (additional noise other than the intrinsic noise
                                        in the factors)

    HalfLife: Union[int or list or np.ndarray]
        HalfLife of mean reversion to simulate factors with different speeds

    rng: np.random.mtrand.RandomState
        Random number generator for reproducibility

    offset: int = 2
        Amount of additional observation to simulate

    uncorrelated: bool = False
        Boolean to regulate if the simulated factor are correlated or not

    t_stud : bool = False
        Bool to regulate if Student\'s t noises are needed

    degrees : int = 8
        Degrees of freedom for Student\'s t noises

    vol: str = 'omosk'
        Choose between 'omosk' and 'eterosk' for the kind of volatility
    Returns
    -------
    realret: Union[list or np.ndarray]
        Simulated series of returns
    factors: Union[list or np.ndarray]
        Simulated series of factors
    f_speed: Union[list or np.ndarray]
        Speed of mean reversion computed form HalfLife argument
    """

    # use samplesize +2 because when iterating the algorithm is necessary to
    # have one observation more (the last space representation) and because
    # we want be able to plot insample operation every tousand observation.
    # Therefore we don't want the index ending at 999 instead of 1000

    # Generate stochastic factor component and compute speed of mean reversion
    # simulate the single factor according to OU process
    # select proper speed of mean reversion and initialization point
    # it is faster to increase the size of a python list than a numpy array
    # therefore we convert later the list
    # https://www.jmp.com/en_us/statistics-knowledge-portal/t-test/t-distribution.html#:~:text=The%20shape%20of%20the%20t,%E2%80%9D%20than%20the%20z%2Ddistribution.

    lambdas = np.around(np.log(2) / HalfLife, 4)

    f0 = np.zeros(shape=(len(lambdas), ))

    if vol == "omosk":
        if t_stud:
            if uncorrelated:
                eps = rng.standard_t(degrees,
                                     (N_train + offset, len(HalfLife)))
            else:
                eps = rng.standard_t(degrees, (N_train + offset))
        else:
            if uncorrelated:
                eps = rng.randn(N_train + offset, len(HalfLife))
            else:
                eps = rng.randn(N_train + offset)

        f = []

        # possibility of triple noise
        for i in tqdm(
                iterable=range(N_train + offset),
                desc="Simulating Factors",
                disable=disable_tqdm,
        ):
            # multiply makes the hadamard (componentwise) product
            # if we want to add different volatility for different factors we could
            # add multiply also the the second part of the equation
            f1 = np.multiply((1 - lambdas * dt), f0) + np.multiply(
                np.array(sigmaf) * np.sqrt(dt), eps[i])
            f.append(f1)
            f0 = f1

    elif vol == "heterosk":
        volmodel = GARCH(p=1, q=1)
        # these factors, if multiple, are uncorrelated by default because the noise is constructed one by one
        if len(sigmaf) > 1:

            eps = []
            for i in range(len(sigmaf)):
                om = sigmaf[i]**2  # same vol as original GP experiments
                alph = 0.05
                b = 1 - alph - om
                garch_p = np.array([om, alph, b])

                e = volmodel.simulate(garch_p, N_train + offset, rng.randn)[0]
                eps.append(e.reshape(-1, 1))

            eps = np.concatenate(eps, axis=1)
        else:

            om = sigmaf[0]**2  # same vol as original GP experiments
            alph = 0.05
            b = 1 - alph - om
            garch_p = np.array([om, alph, b])

            eps = volmodel.simulate(garch_p, N_train + offset, rng.randn)[0]

        f = []
        # possibility of triple noise
        for i in tqdm(
                iterable=range(N_train + offset),
                desc="Simulating Factors",
                disable=disable_tqdm,
        ):
            # multiply makes the hadamard (componentwise) product
            # if we want to add different volatility for different factors we could
            # add multiply also the the second part of the equation
            f1 = np.multiply((1 - lambdas * dt), f0) + eps[i] * np.sqrt(dt)
            f.append(f1)
            f0 = f1
    else:
        print("Choose proper volatility setting")
        sys.exit()

    factors = np.vstack(f)
    if vol == "omosk":
        if t_stud:
            u = rng.standard_t(degrees, N_train + offset)
        else:
            u = rng.randn(N_train + offset)

        realret = np.sum(f_param * factors, axis=1) + sigma * u

    elif vol == "heterosk":
        volmodel = GARCH(p=1, q=1)
        om = sigma**2  # same vol as original GP experiments
        alph = 0.05
        b = 1 - alph - om
        garch_p = np.array([om, alph, b])

        u = volmodel.simulate(garch_p, N_train + offset, rng.randn)[0]

        realret = np.sum(f_param * factors, axis=1) + sigma * u
    else:
        print("Choose proper volatility setting")
        sys.exit()
    f_speed = lambdas

    return realret.astype(np.float32), factors.astype(np.float32), f_speed
예제 #10
0
print(model.summary())

#5.
cny = web.DataReader('CNY=X', 'yahoo', dt.datetime(2015, 1, 1),
                     dt.datetime(2015, 12, 31))

ret = (cny.Close - cny.Close.shift(1)) / cny.Close.shift(1)
ret = ret.dropna()

cny.Close.plot()

ret.plot()
plot_acf(ret, lags=20)
plot_pacf(ret, lags=20)

LjungBox = stattools.q_stat(stattools.acf(ret)[1:13], len(ret))
LjungBox[1][-1]

(ret**2).plot()
plot_acf(ret**2, lags=20)
plot_pacf(ret**2, lags=20)

LjungBox = stattools.q_stat(stattools.acf(ret**2)[1:13], len(ret))
LjungBox[1][-1]

from arch.univariate import ARX, GARCH
model = ARX(ret, lags=1)
model.volatility = GARCH()
res = model.fit()
print(res.summary())
예제 #11
0
    return 1.0 / (q / hill_est['c'])**hill_est['xi']


def CornishFisherPpf(q, resid):
    nppf = stats.norm.ppf(q)
    s = stats.skew(resid)
    k = stats.kurtosis(resid)
    cfp = -(nppf + 0.74 * s - 0.24 * k + 0.38 * s**2)

    return cfp


# using GARCH(1,1)-t as filter

tsm = ConstantMean(returns)
garch = GARCH(p=1, q=1)
tsm.volatility = garch
tsm.distribution = StudentsT()
rst = tsm.fit()

print(rst)

sns.distplot(rst.std_resid, fit=stats.t)

sm.graphics.qqplot(rst.std_resid, line='45')

hillEst = HillEstimator(rst.std_resid, 50)
print(hillEst)

tailProb = 1 / 100.0
예제 #12
0
import datetime as dt

import pandas_datareader.data as web

from arch import arch_model
from arch.univariate import ConstantMean, GARCH, Normal
#from arch.univariate import ZeroMean, GARCH, Normal

start = dt.datetime(2000, 1, 1)
end = dt.datetime(2014, 1, 1)
sp500 = web.DataReader('^GSPC', 'yahoo', start=start, end=end)
returns = 100 * sp500['Adj Close'].pct_change().dropna()

am = ConstantMean(returns)
am.volatility = GARCH(1, 0, 1)
am.distribution = Normal()

res = am.fit()

res.summary()

# %%

# import the packages
import numpy as np
from scipy.optimize import minimize
import scipy.stats as stats
import time

# Set up your x values
예제 #13
0
mtss_am = arch_model(mtss_returns)
mtss_res = mtss_am.fit(update_freq=5, disp = 'off')
mfon_am = arch_model(mfon_returns)
mfon_res = mfon_am.fit(update_freq=5, disp = 'off')

mfon_res.conditional_volatility
mfon_vol = mfon_res.conditional_volatility * np.sqrt(252)
mtss_res.conditional_volatility
mtss_vol = mtss_res.conditional_volatility * np.sqrt(252)


cm = ConstantMean(mtss_returns)
res = cm.fit(update_freq=5)
f_pvalue = het_arch(res.resid)[3]

cm.volatility = GARCH(p=1, q=1)

p = plt.plot(title='ASSAD')
p1 = plt.plot(mfon_vol)
p2 = plt.plot(mtss_vol)
p = plt.legend((p1[0], p2[0]), ('MFON', 'MTSS'))

from scipy import stats

pvalue = 1 - stats.chi2.cdf(0.940659, 1)

from arch import arch_model
from scipy import stats

def find_garch(values, max_p=5, max_q=5):
    def lr_test(r1, r2):
def idiosyncratic_forecast(x, y, p, o, q):
    ls = LS(y, x)
    ls.volatility = GARCH(p=p, o=o, q=q)
    res = ls.fit()
    forecast = res.forecast(horizon=2)
    return forecast.residual_variance[-1:]['h.2'].to_list()[0]
예제 #15
0
def simulate_2(PARS, sample_size):
    zm = ZeroMean()
    zm.volatility = GARCH(p=1, q=1)
    sim_data = zm.simulate(PARS, sample_size)

    return sim_data['data']
예제 #16
0
def test_arx_no_lags():
    mod = ARX(SP500, volatility=GARCH())
    res = mod.fit(disp="off")
    assert res.params.shape[0] == 4
    assert "lags" not in mod._model_description(include_lags=False)
예제 #17
0
def run_garch(y, rvol, model, split_date, x=None, verbose=True, lam=None):

    # specify mean model
    ls = ConstantMean(y=y)
    
    # specify volatility model
    if model == "GARCH":
        ls.volatility = GARCH(p=1, q=1)
    elif model == "EGARCH":
        ls.volatility = EGARCH(p=1, o=1, q=1)
    elif model == "EWMA":
        ls.volatility = EWMAVariance(lam)
    else:
        print("Misspecified volatility process name")
    
    res = ls.fit(disp='off', last_obs=split_date)
    
    forecasts_1d = res.forecast(horizon=1)
    forecasted_vol = forecasts_1d.variance.pow(0.5).shift(1).dropna()
    
    test_merged = rvol.join(forecasted_vol).dropna()
    train_merged = rvol.join(res.conditional_volatility).dropna()

    test_MAE = np.abs(test_merged.iloc[:,0] - test_merged.iloc[:,1]).mean()
    train_MAE = np.abs(train_merged.iloc[:,0] - train_merged.iloc[:,1]).mean()
    total_MAE = (test_MAE * len(test_merged) + train_MAE * len(train_merged)) / (len(test_merged) + len(train_merged))
    MAE = [train_MAE, test_MAE, total_MAE]
    
    test_MSE = np.square(test_merged.iloc[:,0] - test_merged.iloc[:,1]).mean()
    train_MSE = np.square(train_merged.iloc[:,0] - train_merged.iloc[:,1]).mean()
    total_MSE = (test_MSE * len(test_merged) + train_MSE * len(train_merged)) / (len(test_merged) + len(train_merged))
    MSE = [train_MSE, test_MSE, total_MSE]
    
    test_HMAE = np.abs(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).mean()
    train_HMAE = np.abs(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).mean()
    total_HMAE = (test_HMAE * len(test_merged) + train_HMAE * len(train_merged)) / (len(test_merged) + len(train_merged))
    HMAE = [train_HMAE, test_HMAE, total_HMAE]
    
    test_HMSE = np.square(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).mean()
    train_HMSE = np.square(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).mean()
    total_HMSE = (test_HMSE * len(test_merged) + train_HMSE * len(train_merged)) / (len(test_merged) + len(train_merged))
    HMSE = [train_HMSE, test_HMSE, total_HMSE]

    df_results = pd.DataFrame(data=np.c_[MAE, MSE, HMAE, HMSE].T, columns=[model + ' ' + x for x in ['in-sample', 'out-of-sample', 'total']],                               index=['MAE', 'MSE', 'HMAE', 'HMSE']).T

    if verbose:
        
        display(Markdown('#### <br> <br> GARCH model results'))
        print(res.summary())
        
        display(Markdown('#### <br> <br> Plot forecast by model vs realized vol'))
        ax = plt.gca()
        forecasted_vol.plot(color='g', ax=ax, alpha=1, label='prediction oos')
        rvol.plot(color='blue', ax=ax, label='ground truth')
        res.conditional_volatility.plot(color='orange', ax=ax, label='prediction in-sample')
        ax.legend()
        
        display(Markdown('#### <br> <br> Results of out-of-sample forecasts with various loss functions'))
        display(df_results)
        
    return df_results
예제 #18
0
        cor_num = stats.pearsonr(cut['sp'], cut['tn'])

        cor0.loc[p, 'cor'] = cor_num[0]
    else:
        cut = returns.loc[(returns['sp'] > score_sp) &
                          (returns['tn'] > score_tn), ]

        cor_num = stats.pearsonr(cut['sp'], cut['tn'])

        cor0.loc[p, 'cor'] = cor_num[0]

cor0.plot()

tsm_sp = ZeroMean(returns['sp'])
garch = GARCH()
tsm_sp.volatility = garch
tsm_sp.distribution = StudentsT()
rst_sp = tsm_sp.fit()

filtered_sp = rst_sp.std_resid

tsm_tn = ZeroMean(returns['tn'])
garch = GARCH()
tsm_tn.volatility = garch
tsm_tn.distribution = StudentsT()
rst_tn = tsm_tn.fit()

filtered_tn = rst_tn.std_resid

filtered_returns = pd.DataFrame(dict(sp=filtered_sp, tn=filtered_tn),
예제 #19
0
# ARCH effect
ar_res = ar_select_order(rates, 5).model.fit()
# Test of no serial correlation and homoskedasticity
print(ar_res.diagnostic_summary())
print(ar_res.summary())
plt.figure()
plt.plot(ar_res.resid)

# a = ar_res.resid
# a_res = ar_select_order(a, 5).model.fit()
# print(a_res.diagnostic_summary())

# Fit with GARCH(p, q)
ar = ARX(rates, lags=[1, 2])  # Mean model
ar.volatility = GARCH(p=1, q=1)  # Volatility model
res = ar.fit()
res.plot()
print(res.summary())

# Forecast
drop = len(data) - len(rates)
start = 3254 - 2 - drop
end = 3262 - 2 - drop

var = res.forecast(start=start, horizon=5,
                   method='simulation').variance[start:1 + end]
var.plot()
entry = [
    '2012:06:20',
    '2012:06:21',
예제 #20
0
from statsmodels.tsa.arima_model import ARMA
import pandas
import numpy
import statsmodels.api as sm

prices = pandas.read_csv("prices.csv", parse_dates=['Date'], index_col=0)
tickers = prices.columns[:-2]
prices = prices.resample('W').agg(lambda x: x[-1])
prices.dropna(axis=0, how='any', inplace=True)
rf = prices['^TNX'].values[:-1]
rf /= (52 * 100)
returns = prices.iloc[:, :-1].pct_change()[1:]
rm = returns['^GSPC'].values
ri = returns.iloc[:, :-1].values
Ri = ri - rf[:, numpy.newaxis]
Rm = rm - rf
model = sm.OLS(Ri, sm.add_constant(Rm))
results = model.fit()
alpha, beta = results.params
epsilon = numpy.sqrt(Ri.var(axis=0) - beta**2 * Rm.var(axis=0))
output = pandas.DataFrame(columns=['alpha', 'beta', 'epsilon'],
                          index=tickers,
                          data=numpy.array([alpha, beta, epsilon]).T)
output.to_csv("coefficients.csv")
from arch.univariate import ARX, GARCH
arx = ARX(rm, lags=1)
arx.volatility = GARCH()
res = arx.fit(disp='off')
pandas.DataFrame(res.params).to_csv("parameters.csv")
예제 #21
0
            mu = forecast.mean.iloc[-1, 0]
            var = forecast.variance.iloc[-1, 0]
            result.append([(test_set-mu)**2, var])
        df = pd.DataFrame(result, columns=['y_true', 'y_pred'])
        results[(p, q)] = np.sqrt(mean_squared_error(df.y_true, df.y_pred))


s = pd.Series(results)
s.index.names = ['p', 'q']
s = s.unstack().sort_index(ascending=False)

sns.heatmap(s, cmap='Blues', annot=True, fmt='.4f')
plt.title('Out-of-Sample RMSE')
plt.savefig(f'{str(iop)}Out-of-Sample RMSE.png')



''' estimate GARCH model '''
best_p, best_q = 2, 2,
am = ConstantMean(nasdaq_returns.clip(lower=nasdaq_returns.quantile(.05),
                                      upper=nasdaq_returns.quantile(.95)))
am.volatility = GARCH(best_p, 0, best_q)
am.distribution = Normal()
best_model = am.fit(update_freq=5)
print(best_model.summary())

fig = best_model.plot(annualize='D')
fig.set_size_inches(12, 8)
fig.tight_layout()

plot_correlogram(best_model.resid.dropna(), lags=250, title='GARCH Residuals')
예제 #22
0
def bruteforce_ts_model(returns, start_p, start_q, max_p, max_q):
    """ This methods bruteforce each possible combination of the ARCH family models. (e.g. ARCH(3), GARCH(3,4), EGARCH(1,3))
        Records its score and save it.

        Args: 
            returns (pandas.Series) : Contains the list of all the returns.
            start_p (int) : Integer who gives the starting point of the range of p parameter
            start_q (int) : Integer who gives the starting point of the range of q parameter
            max_p (int) : Integer who gives the ending point of the range of p parameter
            max_q (int) : Integer who gives the ending point of the range of q parameter

        Output:
            df (pandas.DataFrame) : Dataframe containing all the models and Information criteria
    """

    # We define our list of models to test
    model_types = ['ARCH', 'GARCH', 'EGARCH']

    # We define our list of distribution to test
    dist_types = ['normal', 'studentst', 'skewstudent']

    # We define our list
    AIC_score = []
    BIC_score = []
    LL_score = []
    model_list = []
    mean_model_list = []
    dist_list = []
    q_list = []
    p_list = []

    # We compute the total number of models
    max_iter = max_p * max_q * len(model_types) * len(dist_types)
    current_iter = 0

    # For each model we have
    for model in model_types:

        # For each parameter p
        for each_p in range(start_p, max_p):

            # For each parameter q
            for each_q in range(start_q, max_q):

                # For each distribution type
                for dist in dist_types:

                    # We define our mean model
                    am = ConstantMean(returns)

                    # We define our constant mean
                    mean_model_list.append('ConstantMean')

                    # Our distribution
                    if dist is 'normal':
                        am.distribution = Normal()
                    elif dist is 'studentst':
                        am.distribution = StudentsT()
                    elif dist is 'skewstudent':
                        am.distribution = SkewStudent()

                    # Our volatility process
                    if model is "ARCH":
                        am.volatility = ARCH(p=each_p)
                    elif model is "GARCH":
                        am.volatility = GARCH(p=each_p, q=each_q)
                    elif model is "EGARCH":
                        am.volatility = EGARCH(p=each_p, q=each_q)

                    # We fit our model
                    res = am.fit(update_freq=5, disp='off')

                    # We record our model and distribution
                    model_list.append(model)
                    dist_list.append(dist)

                    # We record the scores
                    AIC_score.append(res.aic)
                    BIC_score.append(res.bic)
                    LL_score.append(res.loglikelihood)

                    # We record the parameters
                    q_list.append(each_q)
                    p_list.append(each_p)

                    # We log the information about each computed model
                    print(
                        f"it: {current_iter}/{max_iter}\tmodel:{model}\tdist:{dist[:6]}\tp:{each_p}\tq:{each_q}\tAIC_score:{round(res.aic,2)}\tBIC_score:{round(res.bic,2)}\tLog Likelihood:{round(res.loglikelihood,2)}"
                    )

                    # If a model has been added then we add one to the iterator
                    current_iter += 1

        # For each computed model
        print("=" * 20, f"{model} finished", "=" * 20)

    # We combine everything to a dataframe
    df = pd.DataFrame({
        'volatility_model': model_list,
        'mean_model': mean_model_list,
        'dist': dist_list,
        'p': p_list,
        'q': q_list,
        'AIC_score': AIC_score,
        'BIC_score': BIC_score,
        'LL_score': LL_score
    })
    return df
eqCurves['Buy and Hold'].plot()
plt.legend()
plt.show()

# # From Arch website

# In[273]:

from arch.univariate import ARX
ar = ARX(Y, lags=30)
print(ar.fit().summary())

# In[270]:

from arch.univariate import ARCH, GARCH
ar.volatility = GARCH(p=3, o=0, q=3)
res = ar.fit(update_freq=0, disp='off')
p(res.summary())

# In[265]:

from arch.univariate import StudentsT
ar.distribution = StudentsT()
res = ar.fit(update_freq=0, disp='off')
p(res.summary())

# In[266]:

arf = ar.forecast(horizon=forecast_steps,
                  start=Y.index[-1],
                  params=res.params,
예제 #24
0
    arch_model,
)
from arch.univariate.mean import _ar_forecast, _ar_to_impulse

SP500 = 100 * sp500.load()["Adj Close"].pct_change().dropna()

MEAN_MODELS = [
    HARX(SP500, lags=[1, 5]),
    ARX(SP500, lags=2),
    ConstantMean(SP500),
    ZeroMean(SP500),
]

VOLATILITIES = [
    ConstantVariance(),
    GARCH(),
    FIGARCH(),
    EWMAVariance(lam=0.94),
    MIDASHyperbolic(),
    HARCH(lags=[1, 5, 22]),
    RiskMetrics2006(),
    APARCH(),
    EGARCH(),
]

MODEL_SPECS = list(product(MEAN_MODELS, VOLATILITIES))
IDS = [
    f"{str(mean).split('(')[0]}-{str(vol).split('(')[0]}"
    for mean, vol in MODEL_SPECS
]
예제 #25
0
def test_blank(small_data, std_data):
    small_mod = ZeroMean(small_data, volatility=GARCH(), rescale=False)
    small_res = small_mod.fit(disp="off")
    mod = ZeroMean(std_data, volatility=GARCH(), rescale=False)
    res = mod.fit(disp="off")
    assert_allclose(1e3 * small_res.params[0], res.params[0], rtol=5e-3)
예제 #26
0
    print(model.params)
    #--------------------------------------------------------
    #收益率残差自相关性检验-----------------------------------
    resid = model.resid
    print(sm.stats.durbin_watson(resid.values))
    #检验残差arch效应-----------------------------------------
    *_, fpvalue = diagnostic.het_arch(resid)
    if fpvalue < 0.05:
        print('异方差性显著', fpvalue)
    else:
        print('异方差性不显著', fpvalue)
    #建立arch模型-----------------------------------------------
    #模型预测
    model = sm.tsa.ARMA(df2, (0, 1)).fit()
    arch_mod = ConstantMean(df2)
    arch_mod.volatility = GARCH(1, 0, 1)
    arch_mod.distribution = StudentsT()
    res = arch_mod.fit(update_freq=5, disp='off')
    mu = model.params[0]
    theta = model.params[1]

    omega = res.params[1]
    alpha = res.params[2]
    beta = res.params[3]
    sigma_t = res.conditional_volatility.ix[-1]
    #print(res.conditional_volatility)
    sigma_predict = np.sqrt(omega + alpha * res.resid.ix[-1]**2 +
                            beta * sigma_t**2)
    epsilon_t = sigma_t * np.random.standard_normal()
    epsilon_predict = sigma_predict * np.random.standard_normal()
    return_predict = mu + epsilon_predict + theta * epsilon_t
예제 #27
0
        table = tabulate(d_p, headers=H1, floatfmt=".4f")
        return table


tab_5 = table_5(data, 0)
print(tab_5.table_comp_a())

# %% table 6 a

model_garch_cr = garch_m(data_crsp[(data_crsp['year'] >= 1953)
                                   & (data_crsp['year'] <= 1984)]['spread'])
results_g_cr = model_garch_cr.fit()
results_g_cr.summary()

# %%

from arch.univariate import ConstantMean, GARCH
gar_0 = ConstantMean(data['spread'])
gar_0.volatility = GARCH(p=2, q=1)
gar_0_r = gar_0.fit()
gar_pa_0 = np.array(gar_0_r.params)
# %%
sigma_2 = gar_0_r.conditional_volatility
X = sm.add_constant(sigma_2)
#mean_0 = sm.tsa.ARMA(data['spread'], order=(0,1))
mean_0 = statsmodels.tsa.arima_model.ARMA(data['spread'],
                                          exog=sigma_2,
                                          order=(0, 1))
mean_0_r = mean_0.fit()
mean_pa_0 = np.array(mean_0_r.params)