def fit(self, start_params=None, maxiter=10000, maxfun=5000, **kwds): self.exog_names.append('beta') self.exog_names.append('theta') self.exog_names.append('a') self.exog_names.append('b') self.exog_names.append('c_1') self.exog_names.append('c_2') gar_0 = ConstantMean(data['spread']) gar_0.volatility = GARCH(p=2, q=1) gar_0_r = gar_0.fit() gar_pa_0 = np.array(gar_0_r.params) sigma_2 = gar_0_r.conditional_volatility # sigma_2 = np.sqrt(gar_0_r.conditional_volatility) mean_0 = statsmodels.tsa.arima_model.ARMA(data['spread'], exog=sigma_2, order=(0, 1)) mean_0_r = mean_0.fit() mean_pa_0 = np.array(mean_0_r.params) # start_params = np.concatenate([ [-0.001],[0.073],[-0.157] , [gar_pa_0[1]] , [gar_pa_0[4]] , gar_pa_0[2:4]]) # start_params = np.array([ -0.001, 0.073, -0.157 , 0.00006 , 0.918 , 0.121, -0.043 ]) start_params = np.concatenate( [mean_pa_0, [gar_pa_0[1]], [gar_pa_0[4]], gar_pa_0[2:4]]) # start_params = np.array([ 0.201, 2.41, -0.157 , 0.00006 , 0.918 , 0.121, -0.043 ]) return super(garch_m, self).fit(start_params=start_params, maxiter=maxiter, maxfun=maxfun, **kwds)
def run_garch_simple(y, mean_model, vol_model, split_date, x=None, verbose=False): # specify mean model if mean_model == "CONST": ls = ConstantMean(y) elif mean_model == 'LS': ls = LS(y=y, x=x) elif mean_model == 'ARX': ls = ARX(y=y, lags=1) else: print("Misspecified mean model name. Please choose between CONST, LS, ARX.") # specify volatility model if vol_model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif vol_model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif vol_model == "EWMA": ls.volatility = EWMAVariance(lam=None) else: print("Misspecified volatility process name. Please choose between GARCH, EGARCH, EWMA.") res = ls.fit(disp='off', last_obs=split_date) if verbose: display(Markdown('#### <br> <br> GARCH model results')) print(res.summary()) return res
def getvolatility(self): df = volatility.getyieldrate(self) vol = 0.0 for i in range(1, len(df)): vol = vol + df[i] * df[i] / 10000.0 am = ConstantMean(df) am.volatility = GARCH(1, 0, 1) am.distribution = Normal() res = am.fit() print('vol =' + str(vol)) print(res.summary) return 0
def run_garch_rolling(y, rvol, model, split_date, x=None, verbose=True, lam=None): # specify mean model ls = ConstantMean(y=y) # specify volatility model if model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif model == "EWMA": ls.volatility = EWMAVariance(lam) else: print("Misspecified volatility process name") res = ls.fit(disp='off', last_obs=split_date) forecasts_1d = res.forecast(horizon=1) forecasted_vol = forecasts_1d.variance.pow(0.5).shift(1).dropna() test_merged = rvol.join(forecasted_vol).dropna() train_merged = rvol.join(res.conditional_volatility).dropna() test_MAE = np.abs(test_merged.iloc[:,0] - test_merged.iloc[:,1]).sum() train_MAE = np.abs(train_merged.iloc[:,0] - train_merged.iloc[:,1]).sum() MAE = [train_MAE, test_MAE] test_MSE = np.square(test_merged.iloc[:,0] - test_merged.iloc[:,1]).sum() train_MSE = np.square(train_merged.iloc[:,0] - train_merged.iloc[:,1]).sum() MSE = [train_MSE, test_MSE] test_HMAE = np.abs(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).sum() train_HMAE = np.abs(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).sum() HMAE = [train_HMAE, test_HMAE] test_HMSE = np.square(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).sum() train_HMSE = np.square(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).sum() HMSE = [train_HMSE, test_HMSE] df_results = pd.DataFrame(data=np.c_[MAE, MSE, HMAE, HMSE].T, columns=[model + ' ' + x for x in ['in-sample', 'out-of-sample']], index=['MAE', 'MSE', 'HMAE', 'HMSE']).T return df_results, len(train_merged), len(test_merged)
def egarch_class(series: pd.Series, max_lag: int): """Selects the model with the most appropriate number of lags for models of the EGARCH class according to BIC Args: series: time series we want to analyse. max_lag: maximum number of lags to be considered in the models. Returns: selected_model: dict with tuple (p, o, q) and BIC of the selected model. """ bics = {} for model in product(range(max_lag + 1), repeat=3): if model[0] == 0 and model[1] == 0: continue # Setting the volatility and mean models: vol_mod = EGARCH(p=model[0], o=model[1], q=model[2]) mod = ConstantMean(series, volatility=vol_mod) bics[model] = mod.fit(disp="off").bic # Getting the model that minimizes BIC: min_key = min(bics, key=bics.get) selected_model = {"order": min_key, "bic": bics[min_key]} return selected_model
def garch_volatility(rets: pd.DataFrame, out=None): """Selects the best garch model and returns estimated volatility. Args: rets: Series of demeaned returns outlier: int number of z-scores to remove outliers (default is no outlier removal). Returns: vol: Series with the fitted conditional volatility. model: dict with the model used to estimate conditional volatility. """ # Getting the model, estimating and getting conditional volatility model = garch_select(rets, outlier=out) if model['model'] == 'tarch': mod = arch_model(rets, p=model['order'][0], o=model['order'][1], q=model['order'][2], power=1) vol = mod.fit(disp="off").conditional_volatility elif model['model'] == 'gjr': mod = arch_model(rets, p=model['order'][0], o=model['order'][1], q=model['order'][2]) vol = mod.fit(disp="off").conditional_volatility elif model['model'] == 'egarch': vol_mod = EGARCH(p=model['order'][0], o=model['order'][1], q=model['order'][2]) mod = ConstantMean(rets, volatility=vol_mod) vol = mod.fit(disp="off").conditional_volatility else: raise NameError('model type not defined') return vol, model
#收益率残差自相关性检验----------------------------------- resid = model.resid print(sm.stats.durbin_watson(resid.values)) #检验残差arch效应----------------------------------------- *_, fpvalue = diagnostic.het_arch(resid) if fpvalue < 0.05: print('异方差性显著', fpvalue) else: print('异方差性不显著', fpvalue) #建立arch模型----------------------------------------------- #模型预测 model = sm.tsa.ARMA(df2, (0, 1)).fit() arch_mod = ConstantMean(df2) arch_mod.volatility = GARCH(1, 0, 1) arch_mod.distribution = StudentsT() res = arch_mod.fit(update_freq=5, disp='off') mu = model.params[0] theta = model.params[1] omega = res.params[1] alpha = res.params[2] beta = res.params[3] sigma_t = res.conditional_volatility.ix[-1] #print(res.conditional_volatility) sigma_predict = np.sqrt(omega + alpha * res.resid.ix[-1]**2 + beta * sigma_t**2) epsilon_t = sigma_t * np.random.standard_normal() epsilon_predict = sigma_predict * np.random.standard_normal() return_predict = mu + epsilon_predict + theta * epsilon_t print(return_predict) #测试2018年数据
def run_garch(y, rvol, model, split_date, x=None, verbose=True, lam=None): # specify mean model ls = ConstantMean(y=y) # specify volatility model if model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif model == "EWMA": ls.volatility = EWMAVariance(lam) else: print("Misspecified volatility process name") res = ls.fit(disp='off', last_obs=split_date) forecasts_1d = res.forecast(horizon=1) forecasted_vol = forecasts_1d.variance.pow(0.5).shift(1).dropna() test_merged = rvol.join(forecasted_vol).dropna() train_merged = rvol.join(res.conditional_volatility).dropna() test_MAE = np.abs(test_merged.iloc[:,0] - test_merged.iloc[:,1]).mean() train_MAE = np.abs(train_merged.iloc[:,0] - train_merged.iloc[:,1]).mean() total_MAE = (test_MAE * len(test_merged) + train_MAE * len(train_merged)) / (len(test_merged) + len(train_merged)) MAE = [train_MAE, test_MAE, total_MAE] test_MSE = np.square(test_merged.iloc[:,0] - test_merged.iloc[:,1]).mean() train_MSE = np.square(train_merged.iloc[:,0] - train_merged.iloc[:,1]).mean() total_MSE = (test_MSE * len(test_merged) + train_MSE * len(train_merged)) / (len(test_merged) + len(train_merged)) MSE = [train_MSE, test_MSE, total_MSE] test_HMAE = np.abs(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).mean() train_HMAE = np.abs(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).mean() total_HMAE = (test_HMAE * len(test_merged) + train_HMAE * len(train_merged)) / (len(test_merged) + len(train_merged)) HMAE = [train_HMAE, test_HMAE, total_HMAE] test_HMSE = np.square(1 - test_merged.iloc[:,1] / test_merged.iloc[:,0]).mean() train_HMSE = np.square(1 - train_merged.iloc[:,1] / train_merged.iloc[:,0]).mean() total_HMSE = (test_HMSE * len(test_merged) + train_HMSE * len(train_merged)) / (len(test_merged) + len(train_merged)) HMSE = [train_HMSE, test_HMSE, total_HMSE] df_results = pd.DataFrame(data=np.c_[MAE, MSE, HMAE, HMSE].T, columns=[model + ' ' + x for x in ['in-sample', 'out-of-sample', 'total']], index=['MAE', 'MSE', 'HMAE', 'HMSE']).T if verbose: display(Markdown('#### <br> <br> GARCH model results')) print(res.summary()) display(Markdown('#### <br> <br> Plot forecast by model vs realized vol')) ax = plt.gca() forecasted_vol.plot(color='g', ax=ax, alpha=1, label='prediction oos') rvol.plot(color='blue', ax=ax, label='ground truth') res.conditional_volatility.plot(color='orange', ax=ax, label='prediction in-sample') ax.legend() display(Markdown('#### <br> <br> Results of out-of-sample forecasts with various loss functions')) display(df_results) return df_results
returns = spClose.apply(np.log) - spClose.shift(1).apply(np.log) returns *= scale returns.dropna(inplace=True) returns.plot() omega = 0.000005 * scale**2 alpha = 0.07 beta = 0.85 theta = 0.5 # using NGARCH11 tsm = ConstantMean(returns) ngarch = NGARCH11(np.array([omega, alpha, beta, theta])) tsm.volatility = ngarch rst = tsm.fit() print(rst) rst.plot(annualize='D') sns.distplot(rst.std_resid, fit=stats.norm) print( ngarch.is_valid(rst.params['alpha'], rst.params['beta'], rst.params['theta'])) sm.graphics.qqplot(rst.std_resid, line='45') # using FixedNGARCH11 tsm = ConstantMean(returns)
import pandas_datareader.data as web from arch import arch_model from arch.univariate import ConstantMean, GARCH, Normal #from arch.univariate import ZeroMean, GARCH, Normal start = dt.datetime(2000, 1, 1) end = dt.datetime(2014, 1, 1) sp500 = web.DataReader('^GSPC', 'yahoo', start=start, end=end) returns = 100 * sp500['Adj Close'].pct_change().dropna() am = ConstantMean(returns) am.volatility = GARCH(1, 0, 1) am.distribution = Normal() res = am.fit() res.summary() # %% # import the packages import numpy as np from scipy.optimize import minimize import scipy.stats as stats import time # Set up your x values x = np.linspace(0, 100, num=100) # Set up your observed y values with a known slope (2.4), intercept (5), and sd (4)
def bruteforce_ts_model(returns, start_p, start_q, max_p, max_q): """ This methods bruteforce each possible combination of the ARCH family models. (e.g. ARCH(3), GARCH(3,4), EGARCH(1,3)) Records its score and save it. Args: returns (pandas.Series) : Contains the list of all the returns. start_p (int) : Integer who gives the starting point of the range of p parameter start_q (int) : Integer who gives the starting point of the range of q parameter max_p (int) : Integer who gives the ending point of the range of p parameter max_q (int) : Integer who gives the ending point of the range of q parameter Output: df (pandas.DataFrame) : Dataframe containing all the models and Information criteria """ # We define our list of models to test model_types = ['ARCH', 'GARCH', 'EGARCH'] # We define our list of distribution to test dist_types = ['normal', 'studentst', 'skewstudent'] # We define our list AIC_score = [] BIC_score = [] LL_score = [] model_list = [] mean_model_list = [] dist_list = [] q_list = [] p_list = [] # We compute the total number of models max_iter = max_p * max_q * len(model_types) * len(dist_types) current_iter = 0 # For each model we have for model in model_types: # For each parameter p for each_p in range(start_p, max_p): # For each parameter q for each_q in range(start_q, max_q): # For each distribution type for dist in dist_types: # We define our mean model am = ConstantMean(returns) # We define our constant mean mean_model_list.append('ConstantMean') # Our distribution if dist is 'normal': am.distribution = Normal() elif dist is 'studentst': am.distribution = StudentsT() elif dist is 'skewstudent': am.distribution = SkewStudent() # Our volatility process if model is "ARCH": am.volatility = ARCH(p=each_p) elif model is "GARCH": am.volatility = GARCH(p=each_p, q=each_q) elif model is "EGARCH": am.volatility = EGARCH(p=each_p, q=each_q) # We fit our model res = am.fit(update_freq=5, disp='off') # We record our model and distribution model_list.append(model) dist_list.append(dist) # We record the scores AIC_score.append(res.aic) BIC_score.append(res.bic) LL_score.append(res.loglikelihood) # We record the parameters q_list.append(each_q) p_list.append(each_p) # We log the information about each computed model print( f"it: {current_iter}/{max_iter}\tmodel:{model}\tdist:{dist[:6]}\tp:{each_p}\tq:{each_q}\tAIC_score:{round(res.aic,2)}\tBIC_score:{round(res.bic,2)}\tLog Likelihood:{round(res.loglikelihood,2)}" ) # If a model has been added then we add one to the iterator current_iter += 1 # For each computed model print("=" * 20, f"{model} finished", "=" * 20) # We combine everything to a dataframe df = pd.DataFrame({ 'volatility_model': model_list, 'mean_model': mean_model_list, 'dist': dist_list, 'p': p_list, 'q': q_list, 'AIC_score': AIC_score, 'BIC_score': BIC_score, 'LL_score': LL_score }) return df
omega = 0.000005 * scale**2 alpha = 0.1 beta = 0.85 garch = arch_model(returns) rst = garch.fit(starting_values=np.array([0.0, omega, alpha, beta])) print(rst) rst.plot(annualize='D') # Method 2 tsm = ConstantMean(returns) garch = GARCH(p=1, q=1) tsm.volatility = garch rst = tsm.fit(starting_values=np.array([0.0, omega, alpha, beta])) print(rst) rst.plot(annualize='D') sb.distplot(rst.resid, fit=stats.norm) # Exercise 2 spClose = pd.read_csv('data/Chapter4_Data1.csv', parse_dates=True, index_col='Date', squeeze=True) spClose.plot() returns = spClose.apply(np.log) - spClose.shift(1).apply(np.log)
mu = forecast.mean.iloc[-1, 0] var = forecast.variance.iloc[-1, 0] result.append([(test_set-mu)**2, var]) df = pd.DataFrame(result, columns=['y_true', 'y_pred']) results[(p, q)] = np.sqrt(mean_squared_error(df.y_true, df.y_pred)) s = pd.Series(results) s.index.names = ['p', 'q'] s = s.unstack().sort_index(ascending=False) sns.heatmap(s, cmap='Blues', annot=True, fmt='.4f') plt.title('Out-of-Sample RMSE') plt.savefig(f'{str(iop)}Out-of-Sample RMSE.png') ''' estimate GARCH model ''' best_p, best_q = 2, 2, am = ConstantMean(nasdaq_returns.clip(lower=nasdaq_returns.quantile(.05), upper=nasdaq_returns.quantile(.95))) am.volatility = GARCH(best_p, 0, best_q) am.distribution = Normal() best_model = am.fit(update_freq=5) print(best_model.summary()) fig = best_model.plot(annualize='D') fig.set_size_inches(12, 8) fig.tight_layout() plot_correlogram(best_model.resid.dropna(), lags=250, title='GARCH Residuals')
print(adf.summary().as_text()) from arch import arch_model mtss_am = arch_model(mtss_returns) mtss_res = mtss_am.fit(update_freq=5, disp = 'off') mfon_am = arch_model(mfon_returns) mfon_res = mfon_am.fit(update_freq=5, disp = 'off') mfon_res.conditional_volatility mfon_vol = mfon_res.conditional_volatility * np.sqrt(252) mtss_res.conditional_volatility mtss_vol = mtss_res.conditional_volatility * np.sqrt(252) cm = ConstantMean(mtss_returns) res = cm.fit(update_freq=5) f_pvalue = het_arch(res.resid)[3] cm.volatility = GARCH(p=1, q=1) p = plt.plot(title='ASSAD') p1 = plt.plot(mfon_vol) p2 = plt.plot(mtss_vol) p = plt.legend((p1[0], p2[0]), ('MFON', 'MTSS')) from scipy import stats pvalue = 1 - stats.chi2.cdf(0.940659, 1) from arch import arch_model from scipy import stats
table = tabulate(d_p, headers=H1, floatfmt=".4f") return table tab_5 = table_5(data, 0) print(tab_5.table_comp_a()) # %% table 6 a model_garch_cr = garch_m(data_crsp[(data_crsp['year'] >= 1953) & (data_crsp['year'] <= 1984)]['spread']) results_g_cr = model_garch_cr.fit() results_g_cr.summary() # %% from arch.univariate import ConstantMean, GARCH gar_0 = ConstantMean(data['spread']) gar_0.volatility = GARCH(p=2, q=1) gar_0_r = gar_0.fit() gar_pa_0 = np.array(gar_0_r.params) # %% sigma_2 = gar_0_r.conditional_volatility X = sm.add_constant(sigma_2) #mean_0 = sm.tsa.ARMA(data['spread'], order=(0,1)) mean_0 = statsmodels.tsa.arima_model.ARMA(data['spread'], exog=sigma_2, order=(0, 1)) mean_0_r = mean_0.fit() mean_pa_0 = np.array(mean_0_r.params)