def test_blank(small_data, std_data): small_mod = ZeroMean(small_data, volatility=GARCH(), rescale=False) small_res = small_mod.fit(starting_values=np.array([1e-3, 0.05, 0.90]), disp="off") mod = ZeroMean(std_data, volatility=GARCH(), rescale=False) res = mod.fit(starting_values=np.array([1, 0.05, 0.90]), disp="off") assert_allclose(1e3 * small_res.params[0], res.params[0], rtol=5e-3)
def test_rescale_fit(small_data, std_data): small_mod = ZeroMean(small_data, volatility=GARCH(), rescale=True) small_res = small_mod.fit(disp="off") direct_mod = ZeroMean(10 * small_data, volatility=GARCH()) direct_res = direct_mod.fit(disp="off") assert_allclose(small_res.loglikelihood, direct_res.loglikelihood) small_fcast = small_res.forecast(start=0) direct_fcast = direct_res.forecast(start=0) assert_allclose(small_fcast.variance, direct_fcast.variance)
def fit(self, start_params=None, maxiter=10000, maxfun=5000, **kwds): self.exog_names.append('beta') self.exog_names.append('theta') self.exog_names.append('a') self.exog_names.append('b') self.exog_names.append('c_1') self.exog_names.append('c_2') gar_0 = ConstantMean(data['spread']) gar_0.volatility = GARCH(p=2, q=1) gar_0_r = gar_0.fit() gar_pa_0 = np.array(gar_0_r.params) sigma_2 = gar_0_r.conditional_volatility # sigma_2 = np.sqrt(gar_0_r.conditional_volatility) mean_0 = statsmodels.tsa.arima_model.ARMA(data['spread'], exog=sigma_2, order=(0, 1)) mean_0_r = mean_0.fit() mean_pa_0 = np.array(mean_0_r.params) # start_params = np.concatenate([ [-0.001],[0.073],[-0.157] , [gar_pa_0[1]] , [gar_pa_0[4]] , gar_pa_0[2:4]]) # start_params = np.array([ -0.001, 0.073, -0.157 , 0.00006 , 0.918 , 0.121, -0.043 ]) start_params = np.concatenate( [mean_pa_0, [gar_pa_0[1]], [gar_pa_0[4]], gar_pa_0[2:4]]) # start_params = np.array([ 0.201, 2.41, -0.157 , 0.00006 , 0.918 , 0.121, -0.043 ]) return super(garch_m, self).fit(start_params=start_params, maxiter=maxiter, maxfun=maxfun, **kwds)
def small_data(): rs = np.random.RandomState([2389280, 238901, 382908031]) mod = ZeroMean(None, volatility=GARCH(), distribution=Normal(random_state=rs)) sim = mod.simulate([1e-4, 0.05, 0.90], nobs=1000) return sim.data
def run_garch_simple(y, mean_model, vol_model, split_date, x=None, verbose=False): # specify mean model if mean_model == "CONST": ls = ConstantMean(y) elif mean_model == 'LS': ls = LS(y=y, x=x) elif mean_model == 'ARX': ls = ARX(y=y, lags=1) else: print("Misspecified mean model name. Please choose between CONST, LS, ARX.") # specify volatility model if vol_model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif vol_model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif vol_model == "EWMA": ls.volatility = EWMAVariance(lam=None) else: print("Misspecified volatility process name. Please choose between GARCH, EGARCH, EWMA.") res = ls.fit(disp='off', last_obs=split_date) if verbose: display(Markdown('#### <br> <br> GARCH model results')) print(res.summary()) return res
def getvolatility(self): df = volatility.getyieldrate(self) vol = 0.0 for i in range(1, len(df)): vol = vol + df[i] * df[i] / 10000.0 am = ConstantMean(df) am.volatility = GARCH(1, 0, 1) am.distribution = Normal() res = am.fit() print('vol =' + str(vol)) print(res.summary) return 0
def run_garch_rolling(y, rvol, model, split_date, x=None, verbose=True, lam=None): # specify mean model ls = ConstantMean(y=y) # specify volatility model if model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif model == "EWMA": ls.volatility = EWMAVariance(lam) else: print("Misspecified volatility process name") res = ls.fit(disp='off', last_obs=split_date) forecasts_1d = res.forecast(horizon=1) forecasted_vol = forecasts_1d.variance.pow(0.5).shift(1).dropna() test_merged = rvol.join(forecasted_vol).dropna() train_merged = rvol.join(res.conditional_volatility).dropna() test_MAE = np.abs(test_merged.iloc[:,0] - test_merged.iloc[:,1]).sum() train_MAE = np.abs(train_merged.iloc[:,0] - train_merged.iloc[:,1]).sum() MAE = [train_MAE, test_MAE] test_MSE = np.square(test_merged.iloc[:,0] - test_merged.iloc[:,1]).sum() train_MSE = np.square(train_merged.iloc[:,0] - train_merged.iloc[:,1]).sum() MSE = [train_MSE, test_MSE] test_HMAE = np.abs(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).sum() train_HMAE = np.abs(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).sum() HMAE = [train_HMAE, test_HMAE] test_HMSE = np.square(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).sum() train_HMSE = np.square(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).sum() HMSE = [train_HMSE, test_HMSE] df_results = pd.DataFrame(data=np.c_[MAE, MSE, HMAE, HMSE].T, columns=[model + ' ' + x for x in ['in-sample', 'out-of-sample']], index=['MAE', 'MSE', 'HMAE', 'HMSE']).T return df_results, len(train_merged), len(test_merged)
def return_sampler_garch( N_train: int, mean_process: str = "Constant", lags_mean_process: int = None, vol_process: str = "GARCH", distr_noise: str = "normal", seed: int = None, seed_param: int = None, p_arg: list = None, ) -> Tuple[np.ndarray, pd.Series]: # https://stats.stackexchange.com/questions/61824/how-to-interpret-garch-parameters # https://arch.readthedocs.io/en/latest/univariate/introduction.html # https://arch.readthedocs.io/en/latest/univariate/volatility.html # https://github.com/bashtage/arch/blob/master/arch/univariate/volatility.py """ Generates financial returns driven by mean-reverting factors. Parameters ---------- N_train: int Length of the experiment mean_process: str Mean process for the returns. It can be 'Constant' or 'AR' lags_mean_process: int Order of autoregressive lag if mean_process is AR vol_process: str Volatility process for the returns. It can be 'GARCH', 'EGARCH', 'TGARCH', 'ARCH', 'HARCH', 'FIGARCH' or 'Constant'. Note that different volatility processes requires different parameter, which are hard coded. If you want to pass them explicitly, use p_arg. distr_noise: str Distribution for the unpredictable component of the returns. It can be 'normal', 'studt', 'skewstud' or 'ged'. Note that different distributions requires different parameter, which are hard coded. If you want to pass them explicitly, use p_arg. seed: int Seed for experiment reproducibility seed_param: int Seed for drawing randomly the parameters needed for the simulation. The ranges provided are obtained as average lower and upper bounds of several GARCH-type model fitting on real financial time-series. p_arg: pd.Series Pandas series of parameters that you want to pass explicitly. They need to be passed in the right order. Check documentation of the arch python package (https://arch.readthedocs.io/en/latest/index.html) for more details. Returns ------- simulations['data'].values: np.ndarray Simulated series of returns p: pd.Series Series of parameters used for simulation """ names = [] vals = [] if seed_param is None: seed_param = seed rng = np.random.RandomState(seed_param) # choose mean process if mean_process == "Constant": model = ConstantMean(None) names.append("const") if seed_param: vals.append(rng.uniform(0.01, 0.09)) else: vals.append(0.0) elif mean_process == "AR": model = ARX(None, lags=lags_mean_process) names.append("const") vals.append(0.0) if seed_param: for i in range(lags_mean_process): names.append("lag{}".format(i)) vals.append(rng.uniform(-0.09, 0.09)) else: for i in range(lags_mean_process): names.append("lag{}".format(i)) vals.append(0.9) else: return print("This mean process doesn't exist or it's not available.") sys.exit() # choose volatility process if vol_process == "GARCH": model.volatility = GARCH(p=1, q=1) names.extend(["omega", "alpha", "beta"]) if seed_param: om = rng.uniform(0.03, 0.1) alph = rng.uniform(0.05, 0.1) b = rng.uniform(0.86, 0.92) garch_p = np.array([om, alph, b]) / (np.array([om, alph, b]).sum()) else: om = 0.01 alph = 0.05 b = 0.94 garch_p = np.array([om, alph, b]) vals.extend(list(garch_p)) elif vol_process == "ARCH": model.volatility = GARCH(p=1, q=0) names.extend(["omega", "alpha"]) if seed_param: om = rng.uniform(1.4, 4.0) alph = rng.uniform(0.1, 0.6) else: om = 0.01 alph = 0.4 garch_p = np.array([om, alph]) vals.extend(list(garch_p)) elif vol_process == "HARCH": model.volatility = HARCH(lags=[1, 5, 22]) names.extend(["omega", "alpha[1]", "alpha[5]", "alpha[22]"]) if seed_param: om = rng.uniform(1.2, 0.5) alph1 = rng.uniform(0.01, 0.1) alph5 = rng.uniform(0.05, 0.3) alph22 = rng.uniform(0.4, 0.7) else: om = 0.01 alph1 = 0.05 alph5 = 0.15 alph22 = 0.5 garch_p = np.array([om, alph1, alph5, alph22]) vals.extend(list(garch_p)) elif vol_process == "FIGARCH": model.volatility = FIGARCH(p=1, q=1) names.extend(["omega", "phi", "d", "beta"]) if seed_param: om = rng.uniform(0.05, 0.03) phi = rng.uniform(0.1, 0.35) d = rng.uniform(0.3, 0.5) beta = rng.uniform(0.4, 0.7) else: om = 0.01 phi = 0.2 d = 0.2 beta = 0.55 garch_p = np.array([om, phi, d, beta]) vals.extend(list(garch_p)) elif vol_process == "TGARCH": model.volatility = GARCH(p=1, o=1, q=1) names.extend(["omega", "alpha", "gamma", "beta"]) if seed_param: om = rng.uniform(0.02, 0.15) alph = rng.uniform(0.01, 0.07) gamma = rng.uniform(0.03, 0.1) b = rng.uniform(0.88, 0.94) else: om = 0.01 alph = 0.05 gamma = 0.04 b = 0.90 garch_p = np.array([om, alph, gamma, b]) vals.extend(list(garch_p)) elif vol_process == "EGARCH": model.volatility = EGARCH(p=1, o=1, q=1) names.extend(["omega", "alpha", "gamma", "beta"]) if seed_param: om = rng.uniform(0.01, 0.03) alph = rng.uniform(0.06, 0.17) gamma = rng.uniform(-0.05, -0.02) b = rng.uniform(0.97, 0.99) garch_p = np.array([om, alph, gamma, b]) / (np.array( [om, alph, gamma, b]).sum()) else: om = 0.01 alph = 0.05 gamma = -0.02 b = 0.94 garch_p = np.array([om, alph, gamma, b]) vals.extend(list(garch_p)) elif vol_process == "Constant": model.volatility = ConstantVariance() names.append("sigma_const") vals.append(rng.uniform(0.02, 0.05)) else: print("This volatility process doesn't exist or it's not available.") sys.exit() if distr_noise == "normal": model.distribution = Normal(np.random.RandomState(seed)) elif distr_noise == "studt": model.distribution = StudentsT(np.random.RandomState(seed)) names.append("nu") if seed_param: vals.append(rng.randint(6.0, 10.0)) else: vals.append(8.0) elif distr_noise == "skewstud": model.distribution = SkewStudent(np.random.RandomState(seed)) names.extend(["nu", "lambda"]) if seed_param: vals.extend([rng.uniform(6.0, 10.0), rng.uniform(-0.1, 0.1)]) else: vals.extend([8.0, 0.05]) elif distr_noise == "ged": model.distribution = GeneralizedError(np.random.RandomState(seed)) names.append("nu") if seed_param: vals.append(rng.uniform(1.05, 3.0)) else: vals.append(2.0) else: print("This noise distribution doesn't exist or it's not available.") sys.exit() p = pd.Series(data=vals, index=names) if p_arg: p = p_arg simulations = model.simulate(p, N_train) / 100 return simulations["data"].values, p
def return_sampler_GP( N_train: int, sigmaf: Union[float or list or np.ndarray], f_param: Union[float or list or np.ndarray], sigma: Union[float or list or np.ndarray], HalfLife: Union[int or list or np.ndarray], rng: np.random.mtrand.RandomState = None, offset: int = 2, uncorrelated: bool = False, t_stud: bool = False, degrees: int = 8, vol: str = "omosk", dt: int = 1, disable_tqdm: bool = False, ) -> Tuple[Union[list or np.ndarray], Union[list or np.ndarray], Union[list or np.ndarray]]: """ Generates financial returns driven by mean-reverting factors. Parameters ---------- N_train : int Length of the experiment sigmaf : Union[float or list or np.ndarray] Volatilities of the mean reverting factors f_param: Union[float or list or np.ndarray] Factor loadings of the mean reverting factors sigma: Union[float or list or np.ndarray] volatility of the asset return (additional noise other than the intrinsic noise in the factors) HalfLife: Union[int or list or np.ndarray] HalfLife of mean reversion to simulate factors with different speeds rng: np.random.mtrand.RandomState Random number generator for reproducibility offset: int = 2 Amount of additional observation to simulate uncorrelated: bool = False Boolean to regulate if the simulated factor are correlated or not t_stud : bool = False Bool to regulate if Student\'s t noises are needed degrees : int = 8 Degrees of freedom for Student\'s t noises vol: str = 'omosk' Choose between 'omosk' and 'eterosk' for the kind of volatility Returns ------- realret: Union[list or np.ndarray] Simulated series of returns factors: Union[list or np.ndarray] Simulated series of factors f_speed: Union[list or np.ndarray] Speed of mean reversion computed form HalfLife argument """ # use samplesize +2 because when iterating the algorithm is necessary to # have one observation more (the last space representation) and because # we want be able to plot insample operation every tousand observation. # Therefore we don't want the index ending at 999 instead of 1000 # Generate stochastic factor component and compute speed of mean reversion # simulate the single factor according to OU process # select proper speed of mean reversion and initialization point # it is faster to increase the size of a python list than a numpy array # therefore we convert later the list # https://www.jmp.com/en_us/statistics-knowledge-portal/t-test/t-distribution.html#:~:text=The%20shape%20of%20the%20t,%E2%80%9D%20than%20the%20z%2Ddistribution. lambdas = np.around(np.log(2) / HalfLife, 4) f0 = np.zeros(shape=(len(lambdas), )) if vol == "omosk": if t_stud: if uncorrelated: eps = rng.standard_t(degrees, (N_train + offset, len(HalfLife))) else: eps = rng.standard_t(degrees, (N_train + offset)) else: if uncorrelated: eps = rng.randn(N_train + offset, len(HalfLife)) else: eps = rng.randn(N_train + offset) f = [] # possibility of triple noise for i in tqdm( iterable=range(N_train + offset), desc="Simulating Factors", disable=disable_tqdm, ): # multiply makes the hadamard (componentwise) product # if we want to add different volatility for different factors we could # add multiply also the the second part of the equation f1 = np.multiply((1 - lambdas * dt), f0) + np.multiply( np.array(sigmaf) * np.sqrt(dt), eps[i]) f.append(f1) f0 = f1 elif vol == "heterosk": volmodel = GARCH(p=1, q=1) # these factors, if multiple, are uncorrelated by default because the noise is constructed one by one if len(sigmaf) > 1: eps = [] for i in range(len(sigmaf)): om = sigmaf[i]**2 # same vol as original GP experiments alph = 0.05 b = 1 - alph - om garch_p = np.array([om, alph, b]) e = volmodel.simulate(garch_p, N_train + offset, rng.randn)[0] eps.append(e.reshape(-1, 1)) eps = np.concatenate(eps, axis=1) else: om = sigmaf[0]**2 # same vol as original GP experiments alph = 0.05 b = 1 - alph - om garch_p = np.array([om, alph, b]) eps = volmodel.simulate(garch_p, N_train + offset, rng.randn)[0] f = [] # possibility of triple noise for i in tqdm( iterable=range(N_train + offset), desc="Simulating Factors", disable=disable_tqdm, ): # multiply makes the hadamard (componentwise) product # if we want to add different volatility for different factors we could # add multiply also the the second part of the equation f1 = np.multiply((1 - lambdas * dt), f0) + eps[i] * np.sqrt(dt) f.append(f1) f0 = f1 else: print("Choose proper volatility setting") sys.exit() factors = np.vstack(f) if vol == "omosk": if t_stud: u = rng.standard_t(degrees, N_train + offset) else: u = rng.randn(N_train + offset) realret = np.sum(f_param * factors, axis=1) + sigma * u elif vol == "heterosk": volmodel = GARCH(p=1, q=1) om = sigma**2 # same vol as original GP experiments alph = 0.05 b = 1 - alph - om garch_p = np.array([om, alph, b]) u = volmodel.simulate(garch_p, N_train + offset, rng.randn)[0] realret = np.sum(f_param * factors, axis=1) + sigma * u else: print("Choose proper volatility setting") sys.exit() f_speed = lambdas return realret.astype(np.float32), factors.astype(np.float32), f_speed
print(model.summary()) #5. cny = web.DataReader('CNY=X', 'yahoo', dt.datetime(2015, 1, 1), dt.datetime(2015, 12, 31)) ret = (cny.Close - cny.Close.shift(1)) / cny.Close.shift(1) ret = ret.dropna() cny.Close.plot() ret.plot() plot_acf(ret, lags=20) plot_pacf(ret, lags=20) LjungBox = stattools.q_stat(stattools.acf(ret)[1:13], len(ret)) LjungBox[1][-1] (ret**2).plot() plot_acf(ret**2, lags=20) plot_pacf(ret**2, lags=20) LjungBox = stattools.q_stat(stattools.acf(ret**2)[1:13], len(ret)) LjungBox[1][-1] from arch.univariate import ARX, GARCH model = ARX(ret, lags=1) model.volatility = GARCH() res = model.fit() print(res.summary())
return 1.0 / (q / hill_est['c'])**hill_est['xi'] def CornishFisherPpf(q, resid): nppf = stats.norm.ppf(q) s = stats.skew(resid) k = stats.kurtosis(resid) cfp = -(nppf + 0.74 * s - 0.24 * k + 0.38 * s**2) return cfp # using GARCH(1,1)-t as filter tsm = ConstantMean(returns) garch = GARCH(p=1, q=1) tsm.volatility = garch tsm.distribution = StudentsT() rst = tsm.fit() print(rst) sns.distplot(rst.std_resid, fit=stats.t) sm.graphics.qqplot(rst.std_resid, line='45') hillEst = HillEstimator(rst.std_resid, 50) print(hillEst) tailProb = 1 / 100.0
import datetime as dt import pandas_datareader.data as web from arch import arch_model from arch.univariate import ConstantMean, GARCH, Normal #from arch.univariate import ZeroMean, GARCH, Normal start = dt.datetime(2000, 1, 1) end = dt.datetime(2014, 1, 1) sp500 = web.DataReader('^GSPC', 'yahoo', start=start, end=end) returns = 100 * sp500['Adj Close'].pct_change().dropna() am = ConstantMean(returns) am.volatility = GARCH(1, 0, 1) am.distribution = Normal() res = am.fit() res.summary() # %% # import the packages import numpy as np from scipy.optimize import minimize import scipy.stats as stats import time # Set up your x values
mtss_am = arch_model(mtss_returns) mtss_res = mtss_am.fit(update_freq=5, disp = 'off') mfon_am = arch_model(mfon_returns) mfon_res = mfon_am.fit(update_freq=5, disp = 'off') mfon_res.conditional_volatility mfon_vol = mfon_res.conditional_volatility * np.sqrt(252) mtss_res.conditional_volatility mtss_vol = mtss_res.conditional_volatility * np.sqrt(252) cm = ConstantMean(mtss_returns) res = cm.fit(update_freq=5) f_pvalue = het_arch(res.resid)[3] cm.volatility = GARCH(p=1, q=1) p = plt.plot(title='ASSAD') p1 = plt.plot(mfon_vol) p2 = plt.plot(mtss_vol) p = plt.legend((p1[0], p2[0]), ('MFON', 'MTSS')) from scipy import stats pvalue = 1 - stats.chi2.cdf(0.940659, 1) from arch import arch_model from scipy import stats def find_garch(values, max_p=5, max_q=5): def lr_test(r1, r2):
def idiosyncratic_forecast(x, y, p, o, q): ls = LS(y, x) ls.volatility = GARCH(p=p, o=o, q=q) res = ls.fit() forecast = res.forecast(horizon=2) return forecast.residual_variance[-1:]['h.2'].to_list()[0]
def simulate_2(PARS, sample_size): zm = ZeroMean() zm.volatility = GARCH(p=1, q=1) sim_data = zm.simulate(PARS, sample_size) return sim_data['data']
def test_arx_no_lags(): mod = ARX(SP500, volatility=GARCH()) res = mod.fit(disp="off") assert res.params.shape[0] == 4 assert "lags" not in mod._model_description(include_lags=False)
def run_garch(y, rvol, model, split_date, x=None, verbose=True, lam=None): # specify mean model ls = ConstantMean(y=y) # specify volatility model if model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif model == "EWMA": ls.volatility = EWMAVariance(lam) else: print("Misspecified volatility process name") res = ls.fit(disp='off', last_obs=split_date) forecasts_1d = res.forecast(horizon=1) forecasted_vol = forecasts_1d.variance.pow(0.5).shift(1).dropna() test_merged = rvol.join(forecasted_vol).dropna() train_merged = rvol.join(res.conditional_volatility).dropna() test_MAE = np.abs(test_merged.iloc[:,0] - test_merged.iloc[:,1]).mean() train_MAE = np.abs(train_merged.iloc[:,0] - train_merged.iloc[:,1]).mean() total_MAE = (test_MAE * len(test_merged) + train_MAE * len(train_merged)) / (len(test_merged) + len(train_merged)) MAE = [train_MAE, test_MAE, total_MAE] test_MSE = np.square(test_merged.iloc[:,0] - test_merged.iloc[:,1]).mean() train_MSE = np.square(train_merged.iloc[:,0] - train_merged.iloc[:,1]).mean() total_MSE = (test_MSE * len(test_merged) + train_MSE * len(train_merged)) / (len(test_merged) + len(train_merged)) MSE = [train_MSE, test_MSE, total_MSE] test_HMAE = np.abs(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).mean() train_HMAE = np.abs(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).mean() total_HMAE = (test_HMAE * len(test_merged) + train_HMAE * len(train_merged)) / (len(test_merged) + len(train_merged)) HMAE = [train_HMAE, test_HMAE, total_HMAE] test_HMSE = np.square(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).mean() train_HMSE = np.square(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).mean() total_HMSE = (test_HMSE * len(test_merged) + train_HMSE * len(train_merged)) / (len(test_merged) + len(train_merged)) HMSE = [train_HMSE, test_HMSE, total_HMSE] df_results = pd.DataFrame(data=np.c_[MAE, MSE, HMAE, HMSE].T, columns=[model + ' ' + x for x in ['in-sample', 'out-of-sample', 'total']], index=['MAE', 'MSE', 'HMAE', 'HMSE']).T if verbose: display(Markdown('#### <br> <br> GARCH model results')) print(res.summary()) display(Markdown('#### <br> <br> Plot forecast by model vs realized vol')) ax = plt.gca() forecasted_vol.plot(color='g', ax=ax, alpha=1, label='prediction oos') rvol.plot(color='blue', ax=ax, label='ground truth') res.conditional_volatility.plot(color='orange', ax=ax, label='prediction in-sample') ax.legend() display(Markdown('#### <br> <br> Results of out-of-sample forecasts with various loss functions')) display(df_results) return df_results
cor_num = stats.pearsonr(cut['sp'], cut['tn']) cor0.loc[p, 'cor'] = cor_num[0] else: cut = returns.loc[(returns['sp'] > score_sp) & (returns['tn'] > score_tn), ] cor_num = stats.pearsonr(cut['sp'], cut['tn']) cor0.loc[p, 'cor'] = cor_num[0] cor0.plot() tsm_sp = ZeroMean(returns['sp']) garch = GARCH() tsm_sp.volatility = garch tsm_sp.distribution = StudentsT() rst_sp = tsm_sp.fit() filtered_sp = rst_sp.std_resid tsm_tn = ZeroMean(returns['tn']) garch = GARCH() tsm_tn.volatility = garch tsm_tn.distribution = StudentsT() rst_tn = tsm_tn.fit() filtered_tn = rst_tn.std_resid filtered_returns = pd.DataFrame(dict(sp=filtered_sp, tn=filtered_tn),
# ARCH effect ar_res = ar_select_order(rates, 5).model.fit() # Test of no serial correlation and homoskedasticity print(ar_res.diagnostic_summary()) print(ar_res.summary()) plt.figure() plt.plot(ar_res.resid) # a = ar_res.resid # a_res = ar_select_order(a, 5).model.fit() # print(a_res.diagnostic_summary()) # Fit with GARCH(p, q) ar = ARX(rates, lags=[1, 2]) # Mean model ar.volatility = GARCH(p=1, q=1) # Volatility model res = ar.fit() res.plot() print(res.summary()) # Forecast drop = len(data) - len(rates) start = 3254 - 2 - drop end = 3262 - 2 - drop var = res.forecast(start=start, horizon=5, method='simulation').variance[start:1 + end] var.plot() entry = [ '2012:06:20', '2012:06:21',
from statsmodels.tsa.arima_model import ARMA import pandas import numpy import statsmodels.api as sm prices = pandas.read_csv("prices.csv", parse_dates=['Date'], index_col=0) tickers = prices.columns[:-2] prices = prices.resample('W').agg(lambda x: x[-1]) prices.dropna(axis=0, how='any', inplace=True) rf = prices['^TNX'].values[:-1] rf /= (52 * 100) returns = prices.iloc[:, :-1].pct_change()[1:] rm = returns['^GSPC'].values ri = returns.iloc[:, :-1].values Ri = ri - rf[:, numpy.newaxis] Rm = rm - rf model = sm.OLS(Ri, sm.add_constant(Rm)) results = model.fit() alpha, beta = results.params epsilon = numpy.sqrt(Ri.var(axis=0) - beta**2 * Rm.var(axis=0)) output = pandas.DataFrame(columns=['alpha', 'beta', 'epsilon'], index=tickers, data=numpy.array([alpha, beta, epsilon]).T) output.to_csv("coefficients.csv") from arch.univariate import ARX, GARCH arx = ARX(rm, lags=1) arx.volatility = GARCH() res = arx.fit(disp='off') pandas.DataFrame(res.params).to_csv("parameters.csv")
mu = forecast.mean.iloc[-1, 0] var = forecast.variance.iloc[-1, 0] result.append([(test_set-mu)**2, var]) df = pd.DataFrame(result, columns=['y_true', 'y_pred']) results[(p, q)] = np.sqrt(mean_squared_error(df.y_true, df.y_pred)) s = pd.Series(results) s.index.names = ['p', 'q'] s = s.unstack().sort_index(ascending=False) sns.heatmap(s, cmap='Blues', annot=True, fmt='.4f') plt.title('Out-of-Sample RMSE') plt.savefig(f'{str(iop)}Out-of-Sample RMSE.png') ''' estimate GARCH model ''' best_p, best_q = 2, 2, am = ConstantMean(nasdaq_returns.clip(lower=nasdaq_returns.quantile(.05), upper=nasdaq_returns.quantile(.95))) am.volatility = GARCH(best_p, 0, best_q) am.distribution = Normal() best_model = am.fit(update_freq=5) print(best_model.summary()) fig = best_model.plot(annualize='D') fig.set_size_inches(12, 8) fig.tight_layout() plot_correlogram(best_model.resid.dropna(), lags=250, title='GARCH Residuals')
def bruteforce_ts_model(returns, start_p, start_q, max_p, max_q): """ This methods bruteforce each possible combination of the ARCH family models. (e.g. ARCH(3), GARCH(3,4), EGARCH(1,3)) Records its score and save it. Args: returns (pandas.Series) : Contains the list of all the returns. start_p (int) : Integer who gives the starting point of the range of p parameter start_q (int) : Integer who gives the starting point of the range of q parameter max_p (int) : Integer who gives the ending point of the range of p parameter max_q (int) : Integer who gives the ending point of the range of q parameter Output: df (pandas.DataFrame) : Dataframe containing all the models and Information criteria """ # We define our list of models to test model_types = ['ARCH', 'GARCH', 'EGARCH'] # We define our list of distribution to test dist_types = ['normal', 'studentst', 'skewstudent'] # We define our list AIC_score = [] BIC_score = [] LL_score = [] model_list = [] mean_model_list = [] dist_list = [] q_list = [] p_list = [] # We compute the total number of models max_iter = max_p * max_q * len(model_types) * len(dist_types) current_iter = 0 # For each model we have for model in model_types: # For each parameter p for each_p in range(start_p, max_p): # For each parameter q for each_q in range(start_q, max_q): # For each distribution type for dist in dist_types: # We define our mean model am = ConstantMean(returns) # We define our constant mean mean_model_list.append('ConstantMean') # Our distribution if dist is 'normal': am.distribution = Normal() elif dist is 'studentst': am.distribution = StudentsT() elif dist is 'skewstudent': am.distribution = SkewStudent() # Our volatility process if model is "ARCH": am.volatility = ARCH(p=each_p) elif model is "GARCH": am.volatility = GARCH(p=each_p, q=each_q) elif model is "EGARCH": am.volatility = EGARCH(p=each_p, q=each_q) # We fit our model res = am.fit(update_freq=5, disp='off') # We record our model and distribution model_list.append(model) dist_list.append(dist) # We record the scores AIC_score.append(res.aic) BIC_score.append(res.bic) LL_score.append(res.loglikelihood) # We record the parameters q_list.append(each_q) p_list.append(each_p) # We log the information about each computed model print( f"it: {current_iter}/{max_iter}\tmodel:{model}\tdist:{dist[:6]}\tp:{each_p}\tq:{each_q}\tAIC_score:{round(res.aic,2)}\tBIC_score:{round(res.bic,2)}\tLog Likelihood:{round(res.loglikelihood,2)}" ) # If a model has been added then we add one to the iterator current_iter += 1 # For each computed model print("=" * 20, f"{model} finished", "=" * 20) # We combine everything to a dataframe df = pd.DataFrame({ 'volatility_model': model_list, 'mean_model': mean_model_list, 'dist': dist_list, 'p': p_list, 'q': q_list, 'AIC_score': AIC_score, 'BIC_score': BIC_score, 'LL_score': LL_score }) return df
eqCurves['Buy and Hold'].plot() plt.legend() plt.show() # # From Arch website # In[273]: from arch.univariate import ARX ar = ARX(Y, lags=30) print(ar.fit().summary()) # In[270]: from arch.univariate import ARCH, GARCH ar.volatility = GARCH(p=3, o=0, q=3) res = ar.fit(update_freq=0, disp='off') p(res.summary()) # In[265]: from arch.univariate import StudentsT ar.distribution = StudentsT() res = ar.fit(update_freq=0, disp='off') p(res.summary()) # In[266]: arf = ar.forecast(horizon=forecast_steps, start=Y.index[-1], params=res.params,
arch_model, ) from arch.univariate.mean import _ar_forecast, _ar_to_impulse SP500 = 100 * sp500.load()["Adj Close"].pct_change().dropna() MEAN_MODELS = [ HARX(SP500, lags=[1, 5]), ARX(SP500, lags=2), ConstantMean(SP500), ZeroMean(SP500), ] VOLATILITIES = [ ConstantVariance(), GARCH(), FIGARCH(), EWMAVariance(lam=0.94), MIDASHyperbolic(), HARCH(lags=[1, 5, 22]), RiskMetrics2006(), APARCH(), EGARCH(), ] MODEL_SPECS = list(product(MEAN_MODELS, VOLATILITIES)) IDS = [ f"{str(mean).split('(')[0]}-{str(vol).split('(')[0]}" for mean, vol in MODEL_SPECS ]
def test_blank(small_data, std_data): small_mod = ZeroMean(small_data, volatility=GARCH(), rescale=False) small_res = small_mod.fit(disp="off") mod = ZeroMean(std_data, volatility=GARCH(), rescale=False) res = mod.fit(disp="off") assert_allclose(1e3 * small_res.params[0], res.params[0], rtol=5e-3)
print(model.params) #-------------------------------------------------------- #收益率残差自相关性检验----------------------------------- resid = model.resid print(sm.stats.durbin_watson(resid.values)) #检验残差arch效应----------------------------------------- *_, fpvalue = diagnostic.het_arch(resid) if fpvalue < 0.05: print('异方差性显著', fpvalue) else: print('异方差性不显著', fpvalue) #建立arch模型----------------------------------------------- #模型预测 model = sm.tsa.ARMA(df2, (0, 1)).fit() arch_mod = ConstantMean(df2) arch_mod.volatility = GARCH(1, 0, 1) arch_mod.distribution = StudentsT() res = arch_mod.fit(update_freq=5, disp='off') mu = model.params[0] theta = model.params[1] omega = res.params[1] alpha = res.params[2] beta = res.params[3] sigma_t = res.conditional_volatility.ix[-1] #print(res.conditional_volatility) sigma_predict = np.sqrt(omega + alpha * res.resid.ix[-1]**2 + beta * sigma_t**2) epsilon_t = sigma_t * np.random.standard_normal() epsilon_predict = sigma_predict * np.random.standard_normal() return_predict = mu + epsilon_predict + theta * epsilon_t
table = tabulate(d_p, headers=H1, floatfmt=".4f") return table tab_5 = table_5(data, 0) print(tab_5.table_comp_a()) # %% table 6 a model_garch_cr = garch_m(data_crsp[(data_crsp['year'] >= 1953) & (data_crsp['year'] <= 1984)]['spread']) results_g_cr = model_garch_cr.fit() results_g_cr.summary() # %% from arch.univariate import ConstantMean, GARCH gar_0 = ConstantMean(data['spread']) gar_0.volatility = GARCH(p=2, q=1) gar_0_r = gar_0.fit() gar_pa_0 = np.array(gar_0_r.params) # %% sigma_2 = gar_0_r.conditional_volatility X = sm.add_constant(sigma_2) #mean_0 = sm.tsa.ARMA(data['spread'], order=(0,1)) mean_0 = statsmodels.tsa.arima_model.ARMA(data['spread'], exog=sigma_2, order=(0, 1)) mean_0_r = mean_0.fit() mean_pa_0 = np.array(mean_0_r.params)