def run_garch_simple(y, mean_model, vol_model, split_date, x=None, verbose=False): # specify mean model if mean_model == "CONST": ls = ConstantMean(y) elif mean_model == 'LS': ls = LS(y=y, x=x) elif mean_model == 'ARX': ls = ARX(y=y, lags=1) else: print("Misspecified mean model name. Please choose between CONST, LS, ARX.") # specify volatility model if vol_model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif vol_model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif vol_model == "EWMA": ls.volatility = EWMAVariance(lam=None) else: print("Misspecified volatility process name. Please choose between GARCH, EGARCH, EWMA.") res = ls.fit(disp='off', last_obs=split_date) if verbose: display(Markdown('#### <br> <br> GARCH model results')) print(res.summary()) return res
def fit(self, start_params=None, maxiter=10000, maxfun=5000, **kwds): self.exog_names.append('beta') self.exog_names.append('theta') self.exog_names.append('a') self.exog_names.append('b') self.exog_names.append('c_1') self.exog_names.append('c_2') gar_0 = ConstantMean(data['spread']) gar_0.volatility = GARCH(p=2, q=1) gar_0_r = gar_0.fit() gar_pa_0 = np.array(gar_0_r.params) sigma_2 = gar_0_r.conditional_volatility # sigma_2 = np.sqrt(gar_0_r.conditional_volatility) mean_0 = statsmodels.tsa.arima_model.ARMA(data['spread'], exog=sigma_2, order=(0, 1)) mean_0_r = mean_0.fit() mean_pa_0 = np.array(mean_0_r.params) # start_params = np.concatenate([ [-0.001],[0.073],[-0.157] , [gar_pa_0[1]] , [gar_pa_0[4]] , gar_pa_0[2:4]]) # start_params = np.array([ -0.001, 0.073, -0.157 , 0.00006 , 0.918 , 0.121, -0.043 ]) start_params = np.concatenate( [mean_pa_0, [gar_pa_0[1]], [gar_pa_0[4]], gar_pa_0[2:4]]) # start_params = np.array([ 0.201, 2.41, -0.157 , 0.00006 , 0.918 , 0.121, -0.043 ]) return super(garch_m, self).fit(start_params=start_params, maxiter=maxiter, maxfun=maxfun, **kwds)
def run_garch_rolling(y, rvol, model, split_date, x=None, verbose=True, lam=None): # specify mean model ls = ConstantMean(y=y) # specify volatility model if model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif model == "EWMA": ls.volatility = EWMAVariance(lam) else: print("Misspecified volatility process name") res = ls.fit(disp='off', last_obs=split_date) forecasts_1d = res.forecast(horizon=1) forecasted_vol = forecasts_1d.variance.pow(0.5).shift(1).dropna() test_merged = rvol.join(forecasted_vol).dropna() train_merged = rvol.join(res.conditional_volatility).dropna() test_MAE = np.abs(test_merged.iloc[:,0] - test_merged.iloc[:,1]).sum() train_MAE = np.abs(train_merged.iloc[:,0] - train_merged.iloc[:,1]).sum() MAE = [train_MAE, test_MAE] test_MSE = np.square(test_merged.iloc[:,0] - test_merged.iloc[:,1]).sum() train_MSE = np.square(train_merged.iloc[:,0] - train_merged.iloc[:,1]).sum() MSE = [train_MSE, test_MSE] test_HMAE = np.abs(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).sum() train_HMAE = np.abs(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).sum() HMAE = [train_HMAE, test_HMAE] test_HMSE = np.square(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).sum() train_HMSE = np.square(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).sum() HMSE = [train_HMSE, test_HMSE] df_results = pd.DataFrame(data=np.c_[MAE, MSE, HMAE, HMSE].T, columns=[model + ' ' + x for x in ['in-sample', 'out-of-sample']], index=['MAE', 'MSE', 'HMAE', 'HMSE']).T return df_results, len(train_merged), len(test_merged)
def getvolatility(self): df = volatility.getyieldrate(self) vol = 0.0 for i in range(1, len(df)): vol = vol + df[i] * df[i] / 10000.0 am = ConstantMean(df) am.volatility = GARCH(1, 0, 1) am.distribution = Normal() res = am.fit() print('vol =' + str(vol)) print(res.summary) return 0
print(model.params) #-------------------------------------------------------- #收益率残差自相关性检验----------------------------------- resid = model.resid print(sm.stats.durbin_watson(resid.values)) #检验残差arch效应----------------------------------------- *_, fpvalue = diagnostic.het_arch(resid) if fpvalue < 0.05: print('异方差性显著', fpvalue) else: print('异方差性不显著', fpvalue) #建立arch模型----------------------------------------------- #模型预测 model = sm.tsa.ARMA(df2, (0, 1)).fit() arch_mod = ConstantMean(df2) arch_mod.volatility = GARCH(1, 0, 1) arch_mod.distribution = StudentsT() res = arch_mod.fit(update_freq=5, disp='off') mu = model.params[0] theta = model.params[1] omega = res.params[1] alpha = res.params[2] beta = res.params[3] sigma_t = res.conditional_volatility.ix[-1] #print(res.conditional_volatility) sigma_predict = np.sqrt(omega + alpha * res.resid.ix[-1]**2 + beta * sigma_t**2) epsilon_t = sigma_t * np.random.standard_normal() epsilon_predict = sigma_predict * np.random.standard_normal() return_predict = mu + epsilon_predict + theta * epsilon_t
def _get_ARCH_model(self, returns: LogReturnsSeries, vol_process: VolatilityProcess): am = ConstantMean(returns) am.volatility = vol_process am.distribution = Normal() return am
def run_garch(y, rvol, model, split_date, x=None, verbose=True, lam=None): # specify mean model ls = ConstantMean(y=y) # specify volatility model if model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif model == "EWMA": ls.volatility = EWMAVariance(lam) else: print("Misspecified volatility process name") res = ls.fit(disp='off', last_obs=split_date) forecasts_1d = res.forecast(horizon=1) forecasted_vol = forecasts_1d.variance.pow(0.5).shift(1).dropna() test_merged = rvol.join(forecasted_vol).dropna() train_merged = rvol.join(res.conditional_volatility).dropna() test_MAE = np.abs(test_merged.iloc[:,0] - test_merged.iloc[:,1]).mean() train_MAE = np.abs(train_merged.iloc[:,0] - train_merged.iloc[:,1]).mean() total_MAE = (test_MAE * len(test_merged) + train_MAE * len(train_merged)) / (len(test_merged) + len(train_merged)) MAE = [train_MAE, test_MAE, total_MAE] test_MSE = np.square(test_merged.iloc[:,0] - test_merged.iloc[:,1]).mean() train_MSE = np.square(train_merged.iloc[:,0] - train_merged.iloc[:,1]).mean() total_MSE = (test_MSE * len(test_merged) + train_MSE * len(train_merged)) / (len(test_merged) + len(train_merged)) MSE = [train_MSE, test_MSE, total_MSE] test_HMAE = np.abs(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).mean() train_HMAE = np.abs(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).mean() total_HMAE = (test_HMAE * len(test_merged) + train_HMAE * len(train_merged)) / (len(test_merged) + len(train_merged)) HMAE = [train_HMAE, test_HMAE, total_HMAE] test_HMSE = np.square(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).mean() train_HMSE = np.square(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).mean() total_HMSE = (test_HMSE * len(test_merged) + train_HMSE * len(train_merged)) / (len(test_merged) + len(train_merged)) HMSE = [train_HMSE, test_HMSE, total_HMSE] df_results = pd.DataFrame(data=np.c_[MAE, MSE, HMAE, HMSE].T, columns=[model + ' ' + x for x in ['in-sample', 'out-of-sample', 'total']], index=['MAE', 'MSE', 'HMAE', 'HMSE']).T if verbose: display(Markdown('#### <br> <br> GARCH model results')) print(res.summary()) display(Markdown('#### <br> <br> Plot forecast by model vs realized vol')) ax = plt.gca() forecasted_vol.plot(color='g', ax=ax, alpha=1, label='prediction oos') rvol.plot(color='blue', ax=ax, label='ground truth') res.conditional_volatility.plot(color='orange', ax=ax, label='prediction in-sample') ax.legend() display(Markdown('#### <br> <br> Results of out-of-sample forecasts with various loss functions')) display(df_results) return df_results
def return_sampler_garch( N_train: int, mean_process: str = "Constant", lags_mean_process: int = None, vol_process: str = "GARCH", distr_noise: str = "normal", seed: int = None, seed_param: int = None, p_arg: list = None, ) -> Tuple[np.ndarray, pd.Series]: # https://stats.stackexchange.com/questions/61824/how-to-interpret-garch-parameters # https://arch.readthedocs.io/en/latest/univariate/introduction.html # https://arch.readthedocs.io/en/latest/univariate/volatility.html # https://github.com/bashtage/arch/blob/master/arch/univariate/volatility.py """ Generates financial returns driven by mean-reverting factors. Parameters ---------- N_train: int Length of the experiment mean_process: str Mean process for the returns. It can be 'Constant' or 'AR' lags_mean_process: int Order of autoregressive lag if mean_process is AR vol_process: str Volatility process for the returns. It can be 'GARCH', 'EGARCH', 'TGARCH', 'ARCH', 'HARCH', 'FIGARCH' or 'Constant'. Note that different volatility processes requires different parameter, which are hard coded. If you want to pass them explicitly, use p_arg. distr_noise: str Distribution for the unpredictable component of the returns. It can be 'normal', 'studt', 'skewstud' or 'ged'. Note that different distributions requires different parameter, which are hard coded. If you want to pass them explicitly, use p_arg. seed: int Seed for experiment reproducibility seed_param: int Seed for drawing randomly the parameters needed for the simulation. The ranges provided are obtained as average lower and upper bounds of several GARCH-type model fitting on real financial time-series. p_arg: pd.Series Pandas series of parameters that you want to pass explicitly. They need to be passed in the right order. Check documentation of the arch python package (https://arch.readthedocs.io/en/latest/index.html) for more details. Returns ------- simulations['data'].values: np.ndarray Simulated series of returns p: pd.Series Series of parameters used for simulation """ names = [] vals = [] if seed_param is None: seed_param = seed rng = np.random.RandomState(seed_param) # choose mean process if mean_process == "Constant": model = ConstantMean(None) names.append("const") if seed_param: vals.append(rng.uniform(0.01, 0.09)) else: vals.append(0.0) elif mean_process == "AR": model = ARX(None, lags=lags_mean_process) names.append("const") vals.append(0.0) if seed_param: for i in range(lags_mean_process): names.append("lag{}".format(i)) vals.append(rng.uniform(-0.09, 0.09)) else: for i in range(lags_mean_process): names.append("lag{}".format(i)) vals.append(0.9) else: return print("This mean process doesn't exist or it's not available.") sys.exit() # choose volatility process if vol_process == "GARCH": model.volatility = GARCH(p=1, q=1) names.extend(["omega", "alpha", "beta"]) if seed_param: om = rng.uniform(0.03, 0.1) alph = rng.uniform(0.05, 0.1) b = rng.uniform(0.86, 0.92) garch_p = np.array([om, alph, b]) / (np.array([om, alph, b]).sum()) else: om = 0.01 alph = 0.05 b = 0.94 garch_p = np.array([om, alph, b]) vals.extend(list(garch_p)) elif vol_process == "ARCH": model.volatility = GARCH(p=1, q=0) names.extend(["omega", "alpha"]) if seed_param: om = rng.uniform(1.4, 4.0) alph = rng.uniform(0.1, 0.6) else: om = 0.01 alph = 0.4 garch_p = np.array([om, alph]) vals.extend(list(garch_p)) elif vol_process == "HARCH": model.volatility = HARCH(lags=[1, 5, 22]) names.extend(["omega", "alpha[1]", "alpha[5]", "alpha[22]"]) if seed_param: om = rng.uniform(1.2, 0.5) alph1 = rng.uniform(0.01, 0.1) alph5 = rng.uniform(0.05, 0.3) alph22 = rng.uniform(0.4, 0.7) else: om = 0.01 alph1 = 0.05 alph5 = 0.15 alph22 = 0.5 garch_p = np.array([om, alph1, alph5, alph22]) vals.extend(list(garch_p)) elif vol_process == "FIGARCH": model.volatility = FIGARCH(p=1, q=1) names.extend(["omega", "phi", "d", "beta"]) if seed_param: om = rng.uniform(0.05, 0.03) phi = rng.uniform(0.1, 0.35) d = rng.uniform(0.3, 0.5) beta = rng.uniform(0.4, 0.7) else: om = 0.01 phi = 0.2 d = 0.2 beta = 0.55 garch_p = np.array([om, phi, d, beta]) vals.extend(list(garch_p)) elif vol_process == "TGARCH": model.volatility = GARCH(p=1, o=1, q=1) names.extend(["omega", "alpha", "gamma", "beta"]) if seed_param: om = rng.uniform(0.02, 0.15) alph = rng.uniform(0.01, 0.07) gamma = rng.uniform(0.03, 0.1) b = rng.uniform(0.88, 0.94) else: om = 0.01 alph = 0.05 gamma = 0.04 b = 0.90 garch_p = np.array([om, alph, gamma, b]) vals.extend(list(garch_p)) elif vol_process == "EGARCH": model.volatility = EGARCH(p=1, o=1, q=1) names.extend(["omega", "alpha", "gamma", "beta"]) if seed_param: om = rng.uniform(0.01, 0.03) alph = rng.uniform(0.06, 0.17) gamma = rng.uniform(-0.05, -0.02) b = rng.uniform(0.97, 0.99) garch_p = np.array([om, alph, gamma, b]) / (np.array( [om, alph, gamma, b]).sum()) else: om = 0.01 alph = 0.05 gamma = -0.02 b = 0.94 garch_p = np.array([om, alph, gamma, b]) vals.extend(list(garch_p)) elif vol_process == "Constant": model.volatility = ConstantVariance() names.append("sigma_const") vals.append(rng.uniform(0.02, 0.05)) else: print("This volatility process doesn't exist or it's not available.") sys.exit() if distr_noise == "normal": model.distribution = Normal(np.random.RandomState(seed)) elif distr_noise == "studt": model.distribution = StudentsT(np.random.RandomState(seed)) names.append("nu") if seed_param: vals.append(rng.randint(6.0, 10.0)) else: vals.append(8.0) elif distr_noise == "skewstud": model.distribution = SkewStudent(np.random.RandomState(seed)) names.extend(["nu", "lambda"]) if seed_param: vals.extend([rng.uniform(6.0, 10.0), rng.uniform(-0.1, 0.1)]) else: vals.extend([8.0, 0.05]) elif distr_noise == "ged": model.distribution = GeneralizedError(np.random.RandomState(seed)) names.append("nu") if seed_param: vals.append(rng.uniform(1.05, 3.0)) else: vals.append(2.0) else: print("This noise distribution doesn't exist or it's not available.") sys.exit() p = pd.Series(data=vals, index=names) if p_arg: p = p_arg simulations = model.simulate(p, N_train) / 100 return simulations["data"].values, p
returns = spClose.apply(np.log) - spClose.shift(1).apply(np.log) returns *= scale returns.dropna(inplace=True) returns.plot() omega = 0.000005 * scale**2 alpha = 0.07 beta = 0.85 theta = 0.5 # using NGARCH11 tsm = ConstantMean(returns) ngarch = NGARCH11(np.array([omega, alpha, beta, theta])) tsm.volatility = ngarch rst = tsm.fit() print(rst) rst.plot(annualize='D') sns.distplot(rst.std_resid, fit=stats.norm) print( ngarch.is_valid(rst.params['alpha'], rst.params['beta'], rst.params['theta'])) sm.graphics.qqplot(rst.std_resid, line='45') # using FixedNGARCH11
import datetime as dt import pandas_datareader.data as web from arch import arch_model from arch.univariate import ConstantMean, GARCH, Normal #from arch.univariate import ZeroMean, GARCH, Normal start = dt.datetime(2000, 1, 1) end = dt.datetime(2014, 1, 1) sp500 = web.DataReader('^GSPC', 'yahoo', start=start, end=end) returns = 100 * sp500['Adj Close'].pct_change().dropna() am = ConstantMean(returns) am.volatility = GARCH(1, 0, 1) am.distribution = Normal() res = am.fit() res.summary() # %% # import the packages import numpy as np from scipy.optimize import minimize import scipy.stats as stats import time # Set up your x values
def bruteforce_ts_model(returns, start_p, start_q, max_p, max_q): """ This methods bruteforce each possible combination of the ARCH family models. (e.g. ARCH(3), GARCH(3,4), EGARCH(1,3)) Records its score and save it. Args: returns (pandas.Series) : Contains the list of all the returns. start_p (int) : Integer who gives the starting point of the range of p parameter start_q (int) : Integer who gives the starting point of the range of q parameter max_p (int) : Integer who gives the ending point of the range of p parameter max_q (int) : Integer who gives the ending point of the range of q parameter Output: df (pandas.DataFrame) : Dataframe containing all the models and Information criteria """ # We define our list of models to test model_types = ['ARCH', 'GARCH', 'EGARCH'] # We define our list of distribution to test dist_types = ['normal', 'studentst', 'skewstudent'] # We define our list AIC_score = [] BIC_score = [] LL_score = [] model_list = [] mean_model_list = [] dist_list = [] q_list = [] p_list = [] # We compute the total number of models max_iter = max_p * max_q * len(model_types) * len(dist_types) current_iter = 0 # For each model we have for model in model_types: # For each parameter p for each_p in range(start_p, max_p): # For each parameter q for each_q in range(start_q, max_q): # For each distribution type for dist in dist_types: # We define our mean model am = ConstantMean(returns) # We define our constant mean mean_model_list.append('ConstantMean') # Our distribution if dist is 'normal': am.distribution = Normal() elif dist is 'studentst': am.distribution = StudentsT() elif dist is 'skewstudent': am.distribution = SkewStudent() # Our volatility process if model is "ARCH": am.volatility = ARCH(p=each_p) elif model is "GARCH": am.volatility = GARCH(p=each_p, q=each_q) elif model is "EGARCH": am.volatility = EGARCH(p=each_p, q=each_q) # We fit our model res = am.fit(update_freq=5, disp='off') # We record our model and distribution model_list.append(model) dist_list.append(dist) # We record the scores AIC_score.append(res.aic) BIC_score.append(res.bic) LL_score.append(res.loglikelihood) # We record the parameters q_list.append(each_q) p_list.append(each_p) # We log the information about each computed model print( f"it: {current_iter}/{max_iter}\tmodel:{model}\tdist:{dist[:6]}\tp:{each_p}\tq:{each_q}\tAIC_score:{round(res.aic,2)}\tBIC_score:{round(res.bic,2)}\tLog Likelihood:{round(res.loglikelihood,2)}" ) # If a model has been added then we add one to the iterator current_iter += 1 # For each computed model print("=" * 20, f"{model} finished", "=" * 20) # We combine everything to a dataframe df = pd.DataFrame({ 'volatility_model': model_list, 'mean_model': mean_model_list, 'dist': dist_list, 'p': p_list, 'q': q_list, 'AIC_score': AIC_score, 'BIC_score': BIC_score, 'LL_score': LL_score }) return df
mu = forecast.mean.iloc[-1, 0] var = forecast.variance.iloc[-1, 0] result.append([(test_set-mu)**2, var]) df = pd.DataFrame(result, columns=['y_true', 'y_pred']) results[(p, q)] = np.sqrt(mean_squared_error(df.y_true, df.y_pred)) s = pd.Series(results) s.index.names = ['p', 'q'] s = s.unstack().sort_index(ascending=False) sns.heatmap(s, cmap='Blues', annot=True, fmt='.4f') plt.title('Out-of-Sample RMSE') plt.savefig(f'{str(iop)}Out-of-Sample RMSE.png') ''' estimate GARCH model ''' best_p, best_q = 2, 2, am = ConstantMean(nasdaq_returns.clip(lower=nasdaq_returns.quantile(.05), upper=nasdaq_returns.quantile(.95))) am.volatility = GARCH(best_p, 0, best_q) am.distribution = Normal() best_model = am.fit(update_freq=5) print(best_model.summary()) fig = best_model.plot(annualize='D') fig.set_size_inches(12, 8) fig.tight_layout() plot_correlogram(best_model.resid.dropna(), lags=250, title='GARCH Residuals')
mtss_am = arch_model(mtss_returns) mtss_res = mtss_am.fit(update_freq=5, disp = 'off') mfon_am = arch_model(mfon_returns) mfon_res = mfon_am.fit(update_freq=5, disp = 'off') mfon_res.conditional_volatility mfon_vol = mfon_res.conditional_volatility * np.sqrt(252) mtss_res.conditional_volatility mtss_vol = mtss_res.conditional_volatility * np.sqrt(252) cm = ConstantMean(mtss_returns) res = cm.fit(update_freq=5) f_pvalue = het_arch(res.resid)[3] cm.volatility = GARCH(p=1, q=1) p = plt.plot(title='ASSAD') p1 = plt.plot(mfon_vol) p2 = plt.plot(mtss_vol) p = plt.legend((p1[0], p2[0]), ('MFON', 'MTSS')) from scipy import stats pvalue = 1 - stats.chi2.cdf(0.940659, 1) from arch import arch_model from scipy import stats def find_garch(values, max_p=5, max_q=5): def lr_test(r1, r2):
table = tabulate(d_p, headers=H1, floatfmt=".4f") return table tab_5 = table_5(data, 0) print(tab_5.table_comp_a()) # %% table 6 a model_garch_cr = garch_m(data_crsp[(data_crsp['year'] >= 1953) & (data_crsp['year'] <= 1984)]['spread']) results_g_cr = model_garch_cr.fit() results_g_cr.summary() # %% from arch.univariate import ConstantMean, GARCH gar_0 = ConstantMean(data['spread']) gar_0.volatility = GARCH(p=2, q=1) gar_0_r = gar_0.fit() gar_pa_0 = np.array(gar_0_r.params) # %% sigma_2 = gar_0_r.conditional_volatility X = sm.add_constant(sigma_2) #mean_0 = sm.tsa.ARMA(data['spread'], order=(0,1)) mean_0 = statsmodels.tsa.arima_model.ARMA(data['spread'], exog=sigma_2, order=(0, 1)) mean_0_r = mean_0.fit() mean_pa_0 = np.array(mean_0_r.params)