import Helpers as hlp import arch import statsmodels.api as sm from scipy.signal import detrend from statsmodels.graphics.tsaplots import plot_acf, plot_pacf for cur in dl.valid_currencies(): print 'Evaluating {}'.format(cur) c_train, c_test = dl.load_test_train(cur) train_volatility = hlp.rolling_standard_dev(c_train.values) test_volatility = hlp.rolling_standard_dev(c_test.values) garch_model = arch.arch_model(train_volatility).fit() params = garch_model.params['omega'],\ garch_model.params['alpha[1]'],\ garch_model.params['beta[1]'] forecaster = hlp.GarchForecaster(params) garch_predict = pd.Series(test_volatility,dtype=float)\ .apply(lambda x: forecaster.forecast(x))\ .shift(1) dFrame = pd.DataFrame(test_volatility, columns=['Actual']) dFrame['Predict'] = garch_predict dFrame.plot(title=cur)
def garch_forecast(self): """Return GARCH(1,1) volatility forecast as a tensor of shape[V] where V is vectors/day. """ # short circuit for speeding up testing #return numpy.array([0] * len(self._past_returns[0])) try: return self._garch_forecast except AttributeError: logging.info("runing garch forecast") variances = [] for values in numpy.transpose(self._past_returns): garch = arch.arch_model(values) results = garch.fit(disp = "off") omega = results.params["omega"] alpha1 = results.params["alpha[1]"] beta1 = results.params["beta[1]"] forecast = omega\ + alpha1 * results.resid[-1] ** 2\ + beta1 * results.conditional_volatility[-1] ** 2 if numpy.isnan(forecast): forecast = 0 forecast = max(forecast, 0) # ignore negative variance forecast = min(forecast, 0.04) # limit to trading halt trigger variances.append(forecast) self._garch_forecast = numpy.sqrt(variances) * numpy.sqrt(252) return self._garch_forecast
def get_cv_from_file(f): with open(os.path.join(datadir,f),'r') as inf: fulls = inf.read().decode('latin1') for tag in ['table','td','th','tr']: fulls = sub(r'<{0} .*?>'.format(tag),'<{0}>'.format(tag),fulls) bs = BeautifulSoup(fulls,'lxml') rows = bs.find_all('tr') dates = [] vals = [] for j,row in enumerate(rows[1:-1]): # remove last row cause instead of percent gain they show level td = row.find_all('td') fl = parseFloat(td[2].text) if fl is not None and fl > 0.0: dates.append(parseDate(td[0].text)) vals.append(fl) dates.reverse() vals.reverse() df = pd.DataFrame(zip(dates, vals), columns = ['Dates','Prices']) df['Returns'] = df.Prices.pct_change()*100 df = df[df['Returns'].abs() < 25] ## bad rule of thumb to clean measurement errors df.set_index('Dates', inplace = True) am = arch_model(df.Returns.dropna().tolist(), p=1, o=0, q=1) res = am.fit(update_freq=1, disp='off') df['cv'] = res.conditional_volatility d = df.resample("6M", how='mean') d.index = d.index.map(lambda t: t.replace(year=t.year, month=((t.month -1) // 6 +1)*6, day=1)) return d
def GARCH(share): r = returns(share) am = arch.arch_model(r) res = am.fit(update_freq=5) var = res.conditional_volatility[len(r) - 251] # print(res.summary()) params = res.params.tolist() params.append(var) return params
def load_external(): #http://www.policyuncertainty.com/europe_monthly.html fedea = 'FEEA.PURE064A.M.ES' #'FEEA.SMOOTH064A.M.ES' ipri = 'ESE.425000259D.M.ES' ## government bonds: gb = ['EU.IRT_H_CGBY_M.M.DE','BE.IE_2_6_502A1.M.DE','EU.IRT_H_CGBY_M.M.ES','BE.BE_26_25_10294.M.ES','ESE.854200259D.M.ES'] qbuilder = inquisitor.Inquisitor(token) df = qbuilder.series(ticker = [fedea, ipri] + gb) returns = df['ESE.854200259D.M.ES'].pct_change().dropna()*100 returns = returns.sub(returns.mean())['19890101':] am = arch_model(returns, p=1, o=0, q=1) res = am.fit(update_freq=1, disp='off') df['cv'] = res.conditional_volatility return df
def arch_test(): r = numpy.array([0.945532630498276, 0.614772790142383, 0.834417758890680, 0.862344782601800, 0.555858715401929, 0.641058419842652, 0.720118656981704, 0.643948007732270, 0.138790608092353, 0.279264178231250, 0.993836948076485, 0.531967023876420, 0.964455754192395, 0.873171802181126, 0.937828816793698]) garch11 = arch_model(r, p=1, q=1) res = garch11.fit(update_freq=10) print "arch test >>>", res.summary()
def estimate_univ(self): """Estimate univariate volatility models. """ vol = [] forecast = [] theta = [] data = self.data.ret.copy() for col in data: model = arch_model(data[col], p=1, q=1, mean='Zero', vol='GARCH', dist='Normal') res = model.fit(disp='off') theta.append(res.params) vol.append(res.conditional_volatility) forecast.append(garch_forecast(res).iloc[-1, 0]) theta = pd.concat(theta, axis=1) theta.columns = data.columns self.data.univ_vol = pd.concat(vol, axis=1) self.data.univ_vol.columns = data.columns self.param.univ = theta self.data.univ_forecast = np.array(forecast)
def _addOne(self, _data_struct: DataStruct): index = _data_struct.index()[0] price = _data_struct[self.use_key][0] if self.last_price is not None: rate = math.log(price / self.last_price) self.rate_buf.append(rate) self.fit_count += 1 if self.fit_count > self.fit_period and \ len(self.rate_buf) >= self.fit_begin: # retrain model and reset sigma2 rate_arr = np.array(self.rate_buf) am = arch_model(rate_arr, mean='Zero') res = am.fit(disp='off', show_warning=False) # input(res.summary()) self.param = res.params.values self.sigma2 = res.conditional_volatility[-1] ** 2 self.fit_count = 0 if self.param is not None: estimate = math.sqrt(self.sigma2) * self.factor self.sigma2 = self.param[0] + \ self.param[1] * rate * rate + \ self.param[2] * self.sigma2 predict = math.sqrt(self.sigma2) predict *= self.factor if self.smooth_period > 1 and len(self.data): # smooth last_value = self.data[self.ret_key[1]][-1] predict = (predict - last_value) / \ self.smooth_period + last_value self.data.addDict({ self.idx_key: index, self.ret_key[0]: estimate, self.ret_key[1]: predict, }) self.last_price = price
''' model selection ''' trainsize = 10 * 252 # 10 years data = nasdaq_returns.clip(lower=nasdaq_returns.quantile(.05), upper=nasdaq_returns.quantile(.95)) T = len(nasdaq_returns) results = {} for p in range(1, 5): for q in range(1, 5): print(f'{p} | {q}') result = [] for s, t in enumerate(range(trainsize, T-1)): train_set = data.iloc[s: t] test_set = data.iloc[t+1] # 1-step ahead forecast model = arch_model(y=train_set, p=p, q=q).fit(disp='off') forecast = model.forecast(horizon=1) mu = forecast.mean.iloc[-1, 0] var = forecast.variance.iloc[-1, 0] result.append([(test_set-mu)**2, var]) df = pd.DataFrame(result, columns=['y_true', 'y_pred']) results[(p, q)] = np.sqrt(mean_squared_error(df.y_true, df.y_pred)) s = pd.Series(results) s.index.names = ['p', 'q'] s = s.unstack().sort_index(ascending=False) sns.heatmap(s, cmap='Blues', annot=True, fmt='.4f') plt.title('Out-of-Sample RMSE') plt.savefig(f'{str(iop)}Out-of-Sample RMSE.png')
def get_coefficient(ind): # 读取数据 characteristic = [] df = data[ind] df.index = pd.to_datetime(df.index) # 将字符串索引转换成时间索引 ts = df[index[ind]] # 生成pd.Series对象 t = sm.tsa.stattools.adfuller(ts) # ADF检验 #是否平稳 if t[1] <= 0.05: characteristic.append('是') else: characteristic.append('否') #计算AR滞后阶数 lagnum = sm.tsa.pacf(ts, nlags=20, method='ywunbiased', alpha=None) n = len(lagnum) lagsatis = [] aa = 1 for i in range(n): if aa == 1: if abs(lagnum[i]) > 0.05: lagsatis.append(i) else: aa = aa * (-1) #取连续的大于0.05的阶数 else: break #建立AR(8)模型,即均值方程 lagnumber = lagsatis[-1] #AR的阶数 order = (lagnumber, 0) model = sm.tsa.ARMA(ts, order).fit() #计算残差及残差的平方 at = ts - model.fittedvalues at2 = np.square(at) # 我们检验25个自相关系数 m = 25 acf, q, p = sm.tsa.acf(at2, nlags=m, qstat=True) ## 计算自相关系数 及p-value out = np.c_[range(1, 26), acf[1:], q, p] output = pd.DataFrame(out, columns=['lag', "AC", "Q", "P-value"]) output = output.set_index('lag') b = [x[3] for x in out] #读取p-value #是否序列具有相关性,具有ARCH效应 s = 0 for i in range(5): if b[i] > 0.05: s = s + 1 if s == 0: characteristic.append('是') #建立ARCH模型 lagnum1 = sm.tsa.pacf(at2, nlags=20, method='ywunbiased', alpha=None) #计算滞后阶数 #计算ARCH滞后阶数 n = len(lagnum1) lagsatis1 = [] aa = 1 for i in range(n): if aa == 1: if abs(lagnum1[i]) > 0.05: lagsatis1.append(i) else: aa = aa * (-1) #取连续的大于0.05的阶数 else: break pnumber = lagsatis1[-1] #ARCH的阶数 train = ts[:-10] #建立ARCH模型 am = arch.arch_model(train, mean='AR', lags=lagnumber, vol='ARCH', p=pnumber) res = am.fit() res.summary() #回归拟合 arch_coefficient = res.params #取出系数 arch_tvalue = res.tvalues #取出t值 arch_final = pd.DataFrame({ 'coefficient': arch_coefficient, 'tvalue': arch_tvalue }) #建立GARCH模型 am = arch.arch_model(train, mean='AR', lags=lagnumber, vol='GARCH') res1 = am.fit() res.summary() garch_coefficient = res1.params garch_tvalue = res1.tvalues #取出t值 garch_final = pd.DataFrame({ 'coefficient': garch_coefficient, 'tvalue': garch_tvalue }) #建立EGARCH模型 am = arch.arch_model(train, mean='AR', lags=lagnumber, vol='EGARCH', p=1, o=1, q=1, power=1.0) res2 = am.fit() res2.summary() egarch_coefficient = res2.params egarch_tvalue = res2.tvalues #取出t值 egarch_final = pd.DataFrame({ 'coefficient': egarch_coefficient, 'tvalue': egarch_tvalue }) else: characteristic.append('否') arch_final = pd.DataFrame() garch_final = pd.DataFrame() egarch_final = pd.DataFrame() return characteristic, arch_final, garch_final, egarch_final
plt.subplot(212) plt.plot(at2, label='at^2') plt.legend(loc=0) plt.show() print("===检验残差时序自相关性及方差齐性") print("Ljung-Box Test: H0---the error terms are mutually independent") m = 10 # 检验10个自相关系数 acf, q, p = sm.tsa.acf(at2, nlags=m, qstat=True) ## 计算自相关系数 及p-value out = np.c_[range(1, 11), acf[1:], q, p] output = pd.DataFrame(out, columns=['lag', "AC", "Q", "P-value"]) output = output.set_index('lag') print(output) print("各阶p-值小于0.05") print("拒绝原假设,残差平方有相关性,有ARCH效应") print("===确定ARCH模型阶数") fig = plt.figure(figsize=(20, 5)) ax1 = fig.add_subplot(111) fig = sm.graphics.tsa.plot_pacf(at2, lags=15, ax=ax1) plt.show() print("===一阶PACF函数明显偏离置信域,取ARCH(1)模型") print("===构建GARCH模型") # 训练集 train = data[:-10] # 测试集 test = data[-10:] am = arch.arch_model(train, mean='AR', lags=1, vol='GARCH') res = am.fit()
from matplotlib.pyplot import figure import numpy as np import seaborn as sns from arch import arch_model import arch.data.sp500 warnings.simplefilter("ignore") sns.set_style("whitegrid") sns.mpl.rcParams["figure.figsize"] = (12, 3) data = arch.data.sp500.load() market = data["Adj Close"] returns = 100 * market.pct_change().dropna() am = arch_model(returns) res = am.fit(update_freq=5) prop = matplotlib.font_manager.FontProperties("Roboto") def _set_tight_x(axis, index): try: axis.set_xlim(index[0], index[-1]) except ValueError: pass fig = figure() ax = fig.add_subplot(1, 1, 1) vol = res.conditional_volatility
def apply_model(array): model = arch_model(array, rescale=False) return model
adf = ADF(mfon_df['<CLOSE>']) print(adf.summary().as_text()) mtss_returns = 100 * mtss_df['<CLOSE>'].pct_change().dropna() mfon_returns = 100 * mfon_df['<CLOSE>'].pct_change().dropna() mtss_rplt = mtss_returns.plot(title='MTSS dayly returns') mfon_rplt = mfon_returns.plot(title='MFON dayly returns') adf = ADF(mtss_returns) print(adf.summary().as_text()) adf = ADF(mfon_returns) print(adf.summary().as_text()) from arch import arch_model mtss_am = arch_model(mtss_returns) mtss_res = mtss_am.fit(update_freq=5, disp = 'off') mfon_am = arch_model(mfon_returns) mfon_res = mfon_am.fit(update_freq=5, disp = 'off') mfon_res.conditional_volatility mfon_vol = mfon_res.conditional_volatility * np.sqrt(252) mtss_res.conditional_volatility mtss_vol = mtss_res.conditional_volatility * np.sqrt(252) cm = ConstantMean(mtss_returns) res = cm.fit(update_freq=5) f_pvalue = het_arch(res.resid)[3] cm.volatility = GARCH(p=1, q=1)
from arch import arch_model # !!! The arch package swtiches the meaning of p and q of the GARCH models # compared to the notation on Wikipedia !!! np.mean(np.square(residuals)) # the mean of the squared residuals is very close to zero, so the mean equation # of the ARCH model can be omitted # If we already have the residuals we can fit the ARCH only on that part by # setting the mean equation to be a constant zero; let's see an ARCH(q=1) on # the squared residuals # (If we wanted to work with absolute residuals power=1 shoudl be used) am1 = arch_model(residuals, mean="Zero", vol="ARCH", p=1, dist="Normal", power=2.0) res1 = am1.fit() res1.plot() res1.summary() pylab.plot(np.square(residuals)) # previously we found from the PACF of the squared (and absolute) residuals # that the lag should be 8 am2 = arch_model(residuals, mean="Zero", vol="ARCH", p=8, dist="Normal",
# -*- coding: utf-8 -*- #https://pypi.python.org/pypi/arch #conda install -c https://conda.binstar.org/bashtage arch import pandas as pd import numpy as np import matplotlib.pylab as plt from arch import arch_model r = np.array([ 0.945532630498276, 0.614772790142383, 0.834417758890680, 0.862344782601800, 0.555858715401929, 0.641058419842652, 0.720118656981704, 0.643948007732270, 0.138790608092353, 0.279264178231250, 0.993836948076485, 0.531967023876420, 0.964455754192395, 0.873171802181126, 0.937828816793698 ]) garch11 = arch_model(r, p=1, q=1) res = garch11.fit(update_freq=10) print(res.summary())
import pandas as pd import pandas.io.data as web from arch import arch_model start = '1971-01-04' # data['Adj Close'].head().pct_change().dropna() jpy = web.DataReader('DEXJPUS', 'fred', start=start) print(jpy['DEXJPUS'].head().pct_change().dropna()) print("*" * 60) ret = jpy['DEXJPUS'].pct_change().dropna() ## GARCH(1, 1) am = arch_model(ret) res = am.fit(update_freq=5) print(res.summary()) print("*" * 60) ## GJR-GARCH start = '2010-01-01' jpy = web.DataReader('DEXJPUS', 'fred', start=start) ret = jpy['DEXJPUS'].pct_change().dropna() gjr_am = arch_model(ret, p=1, o=1, q=1) res = gjr_am.fit(update_freq=5, disp='off') print(res.summary()) print("*" * 60)
linestyle='--') plt.show() ############################## ### GARCH MODELING PORTION ### ############################## # GARCH FIXED ROLLING WINDOW (21d) start_loc = 0 window_size = 21 end_loc = window_size steps = 30 forecasts = {} model = arch_model(spy_raw_data['adj_close_1vol'], vol='GARCH', p=1, q=1, rescale=False) model_fit = model.fit() for i in range(len(spy_raw_data) - window_size): model_result = model.fit(first_obs=i + start_loc, last_obs=i + end_loc, disp='off') temp_result = model_result.forecast(horizon=1).variance fcast = temp_result.iloc[i + end_loc] forecasts[fcast.name] = fcast forecast_var = pd.DataFrame(forecasts).T forecast_vol = np.sqrt(forecast_var) plt.plot(forecast_vol, color='red', label='Forecast', alpha=0.5) plt.plot(spy_raw_data['adj_close_1vol'][window_size:], color='green',
# 2) Create tests for other assumptions to choose what is going to be in disrtibution. () def stationary_test(data): fuller = adf.adfuller(data, autolag='AIC') kpss = adf.kpss(data, regression='ct', nlags='auto') if (fuller[0] < fuller[4]['5%']) and (kpss[0] < kpss[3]['5%']): return int(1) else: return int(0) stationarity_res = stationary_test(x) arch_m = arch_model(x, vol='GARCH', p=1, q=1, dist='Normal') garch = arch_m.fit(disp='off') garch_volatility = np.sqrt(garch.params['omega'] + garch.params['alpha[1]'] * garch.resid**2 + garch.conditional_volatility**2 * garch.params['beta[1]']) longterm_volty = np.sqrt( garch.params['omega'] / (1 - garch.params['alpha[1]'] - garch.params['beta[1]'])) volty_df = yz_volatility volty_df['garch_volty'] = garch_volatility volty_df['longterm_volty'] = longterm_volty volty_df.plot() plt.figure(figsize=(16, 6))
ljb_archb = sm.stats.acorr_ljungbox(bonds_resid**2, lags=4, return_df=True) result_lm = pd.DataFrame( [[lmtest_s, lmtest_b], [1 - pval_slm[0], 1 - pval_slm[1]], [cv_lm, cv_lm], [ljb_archs.iloc[-1, 0], ljb_archb.iloc[-1, 0]], [ljb_archs.iloc[-1, 1], ljb_archb.iloc[-1, 1]]], index=['tstat', 'pval', 'cv_lm', 'LJB 4 lags', 'pval'], columns=['Stock', 'Bonds']) result_lm.to_excel('result_lm.xlsx') # Q3d estimate parameter of the GARCH process am_s = arch_model(stock_resid, mean='Zero', vol='GARCH', p=1, q=1, dist='normal', power=2, rescale=False) garch_s = am_s.fit() print(garch_s.summary()) garch_s_forecast = garch_s.forecast(horizon=252) am_b = arch_model(bonds_resid, mean="Zero", vol='GARCH', p=1, q=1, dist='normal', power=2, rescale=False)
def get_best_model(logRtSeries, pLimit, oLimit, qLimit, predictDays): best_bic = np.inf best_order = None best_mdl = None best_numParams = np.inf isZeroMean = False for pValue in range(pLimit + 1): for oValue in range(oLimit + 1): for qValue in range(qLimit + 1): isZeroMean = False try: tmp_mdl = arch_model(y = logRtSeries, p = pValue, o = oValue, q = qValue, dist = 'Normal') tmp_res = tmp_mdl.fit(update_freq=5, disp='off') # Remove mean if it's not significant if tmp_res.pvalues['mu'] > 0.05: isZeroMean = True tmp_mdl = arch_model(y = logRtSeries, mean = 'Zero', p = pValue, o = oValue, q = qValue, dist = 'Normal') tmp_res = tmp_mdl.fit(update_freq=5, disp='off') tmp_bic = tmp_res.bic tmp_numParams = tmp_res.num_params tmp_wn_test = tmp_res.resid / tmp_res._volatility [lbvalue, pvalue] = acorr_ljungbox(tmp_wn_test, lags = 20) # Make sure the model pass Ljunbox Test, and fit the time series if pvalue[19] >= 0.05: if best_bic / tmp_bic > 1.05: best_bic = tmp_bic best_order = [pValue, oValue, qValue] best_mdl = tmp_res # Choose simpler model elif tmp_bic <= best_bic and tmp_numParams <= best_numParams: best_bic = tmp_bic best_order = [pValue, oValue, qValue] best_mdl = tmp_res except: continue # Handle situations when all models don't pass Ljunbox Test if (best_mdl == None): tmp_mdl = arch_model(y = logRtSeries, p = 1, o = 1, q = 1, dist = 'Normal') best_mdl = tmp_mdl.fit(update_freq=5, disp='off') # Remove mean if it's not significant if best_mdl.pvalues['mu'] > 0.05: isZeroMean = True tmp_mdl = arch_model(y = logRtSeries, mean = 'Zero', p = 1, o = 1, q = 1, dist = 'Normal') best_mdl = tmp_mdl.fit(update_freq=5, disp='off') best_bic = best_mdl.bic best_order = [1, 1, 1] # Test for first 20-lag wn_test = best_mdl.resid / best_mdl._volatility [lbvalue, pvalue] = acorr_ljungbox(wn_test, lags = 20) output = {} output['Zero Mean Model'] = isZeroMean output['Best BIC'] = best_bic output['Best Order'] = best_order output['Best Model'] = best_mdl volForecasts = best_mdl.forecast(horizon=predictDays) output['Vol Predictions'] = np.sqrt(volForecasts.residual_variance.iloc[-1].values) output['Ljunbox Test Statistics'] = lbvalue[19] output['Ljunbox Test pvalue'] = pvalue[19] return output
from TorchTSA.model import IGARCHModel, ARMAGARCHModel from arch import arch_model from ParadoxTrading.Chart import Wizard from ParadoxTrading.Fetch.ChineseFutures import FetchDominantIndex from ParadoxTrading.Indicator import LogReturn from ParadoxTrading.Indicator.TSA import GARCH fetcher = FetchDominantIndex() market = fetcher.fetchDayData('20100701', '20180101', 'cu') returns = LogReturn().addMany(market).getAllData() return_arr = np.array(returns['logreturn']) am = arch_model(return_arr, mean='Zero') start_time = time.time() res = am.fit(disp='off', show_warning=False) print('fitting time:', time.time() - start_time) print(res.params) igarch_model = IGARCHModel(_use_mu=False) start_time = time.time() igarch_model.fit(return_arr) print('fitting time:', time.time() - start_time) print( igarch_model.getAlphas(), igarch_model.getBetas(), igarch_model.getConst(), )
import datetime as dt import sys import numpy as np import pandas as pd import pandas_datareader.data as web import matplotlib.pyplot as plt from arch import arch_model start = dt.datetime(2015,1,1) end = dt.datetime(2018,1,1) sp500 = web.DataReader('SPY','iex', start=start, end=end) returns = 100 * sp500['close'].pct_change().dropna() returns.plot() plt.show() from arch import arch_model model=arch_model(returns, vol='Garch', p=1, o=0, q=1, dist='Normal') results=model.fit() print(results.summary()) forecasts = results.forecast(horizon=30, method='simulation', simulations=1000) sims = forecasts.simulations print(np.percentile(sims.values[-1,:,-1].T,5)) plt.hist(sims.values[-1, :,-1],bins=50) plt.title('Distribution of Returns') plt.show()
def GARCH_predictioninterval(endog_train, endog_val, forecast_horizon, periodicity=1, mean_forecast=None, p=1, q=1, alpha=1.96, limit_steps=False): """ Calculate the prediction interval for the given forecasts using the GARCH method https://github.com/bashtage/arch Parameters ---------- endog_train : pandas.DataFrame Training set of target variable endog_val : pandas.DataFrame Validation set of target variable forecast_horizon : int Number of future steps to be forecasted periodicity : int or int list Either a scalar integer value indicating lag length or a list of integers specifying lag locations. mean_forecast : numpy.ndarray, default = None Previously forecasted expected values (e.g. the sarimax mean forecast). If set to None, the mean values of the GARCH forecast are used instead. p : int Lag order of the symmetric innovation q : int Lag order of lagged volatility or equivalent alpha : float, default = 1.96 Measure to adjust confidence interval. Default is set to 1.96, which equals to 95% confidence limit_steps : int, default = False Limits the number of simulation/predictions into the future. If False, steps is equal to length of validation set Returns ------- pandas.Series The forecasted expected values pandas.Series The upper interval for the given forecasts pandas.Series The lower interval for the given forecasts """ print( 'Train a General autoregressive conditional heteroeskedasticity (GARCH) model...' ) test_period = range(endog_val.shape[0]) num_cores = max(multiprocessing.cpu_count() - 2, 1) if limit_steps: test_period = range(limit_steps) model_garch = arch_model(endog_train, vol='GARCH', mean='LS', lags=periodicity, p=p, q=q) #(endog_val[1:] res = model_garch.fit(update_freq=5) # do stepwise iteration and prolongation of the train data again, as a forecast can only work in-sample def garch_PI_predict(i): # extend the train-series with observed values as we move forward in the prediction horizon # to achieve a receding window prediction y_train_i = pd.concat([endog_train, endog_val.iloc[0:i]]) #need to refit, since forecast cannot do out of sample forecasts model_garch = arch_model(y_train_i, vol='GARCH', mean='LS', lags=periodicity, p=p, q=q) res = model_garch.fit(update_freq=20) forecast = model_garch.forecast( res.params, horizon=forecast_horizon) # , start=endog_train.index[-1] #TODO: checkout that mean[0] might be NAN because it starts at start -2 # TODO: could try to use the previously calculated sarimax mean forecast instead... if isinstance(mean_forecast, np.ndarray): expected_value = pd.Series(mean_forecast[i]) else: expected_value = pd.Series(forecast.mean.iloc[-1]) sigma = pd.Series([ math.sqrt(number) for number in forecast.residual_variance.iloc[-1] ]) expected_value.index = endog_val.index[ i:i + forecast_horizon] #this can be an issue if we reach the end of endog val with i sigma.index = endog_val.index[i:i + forecast_horizon] sigma_hn = sum(sigma) / len(sigma) fc_u = expected_value + alpha * sigma fc_l = expected_value - alpha * sigma print('Training and validating GARCH model completed.') return expected_value, fc_u, fc_l expected_value, fc_u, fc_l = zip( *Parallel(n_jobs=min(num_cores, len(test_period)), mmap_mode='c', temp_folder='/tmp')( delayed(garch_PI_predict)(i) for i in test_period if i + forecast_horizon <= endog_val.shape[0])) print('Training and validating GARCH model completed.') return np.asarray(expected_value), np.asarray(fc_u), np.asarray(fc_l)
# Fit with ARMA-GARCH model # ARMA from statsmodels.tsa.arima_model import ARIMA model_arma = ARIMA(ts_data, order=(1, 0, 1)) results_arma = model_arma.fit(disp=-1) residule_arma = results_arma.resid print(results_arma.summary()) # GARCH from arch import arch_model arch = arch_model(residule_arma,p = 1,q = 1) res_arch = arch.fit(update_freq=5) print(res_arch.summary()) # change to normal innovations np.random.seed(1) n_samples = 10000 # Use student t z = np.random.standard_t(20,size=n_samples) x = np.ones((n_samples,)) x[0] = 0 s = np.ones((n_samples,))
def __implement_GARCH_1_1(self): self.__model = arch_model(self.__returns, vol='Garch', p=1, q=1, dist='Normal')
print('ADF Statistic: %f' % adfuller[0]) print('p-value: %f' % adfuller[1]) print('Critical Values:') for key, value in adfuller[4].items(): print('\t%s: %.3f' % (key, value)) #p-val of 0 (lol) so reject H0: unit root, ie. log returns is stationary. Can therefore fit GARCH model. # Split data into train/test for X-value horizon = 7 train, test = diff_log_returns[:-horizon], diff_log_returns[-horizon:] #Fit a GARCH(1,1) model to the data (adding in p,q=17 as motivated by ACF acc leads to higher AIC, so use more parsimonious model). Make the assumption that differenced log returns follow a normal dist. garch_model_one = arch_model(train, mean='Constant', vol="Garch", p=1, o=0, q=1, dist="Normal") output_one = garch_model_one.fit() print(output_one.summary()) """ We note here that the output has the following interpretation: omega: baseline variance alpha: MA term for yesterday on error^s ie. weighted white noise beta: effect of yesterdays vol on today's vol mu: expected return """ #Now, forecast variance for final week of dataset.
sgt.plot_acf(df3['sqd_returns'][1:],lags=40,zero=False) plt.xlabel('Lags') plt.ylabel('ACF') plt.title("ACF Squared Returns") # In[77]: from arch import arch_model # In[79]: model_arch_1 = arch_model(df3['returns'][1:]) results_arch_1 = model_arch_1.fit() results_arch_1.summary() # In[80]: # Mean Model = Constant --> Mean is contant rather than moving which is the property of the stationary data # Vol Model = GARCH ---> It is using GARCH model to model the variance # Dd Model = four variables are calculated # Mean Model: # coeff of mean in the equation # higher t value and p <0.05 determines significance of coefficient # Volatiliy Model: # omega is alpha 0
def get_inst_vol(y, annualize, x = None, mean = 'Constant', vol = 'Garch', dist = 'normal', data = 'prices', freq = 'd', ): """Fn: to calculate conditional volatility of an array using Garch: params -------------- y : {numpy array, series, None} endogenous array of returns x : {numpy array, series, None} exogneous mean : str, optional Name of the mean model. Currently supported options are: 'Constant', 'Zero', 'ARX' and 'HARX' vol : str, optional model, currently supported, 'GARCH' (default), 'EGARCH', 'ARCH' and 'HARCH' dist : str, optional 'normal' (default), 't', 'ged' returns ---------- series of conditioanl volatility. """ if (data == 'prices') or (data =='price'): y = get_rets(y, kind = 'arth', freq = freq) if isinstance(y, pd.core.series.Series): ## remove nan. y = y.dropna() else: raise TypeError('Data should be time series with index as DateTime') # provide a model model = arch.arch_model(y * 100, mean = 'constant', vol = 'Garch') # fit the model res = model.fit(update_freq= 5) # get the parameters. Here [1] means number of lags. This is only Garch(1,1) omega = res.params['omega'] alpha = res.params['alpha[1]'] beta = res.params['beta[1]'] inst_vol = res.conditional_volatility * np.sqrt(252) if isinstance(inst_vol, pd.core.series.Series): inst_vol.name = y.name elif isinstance(inst_vol, np.ndarray): inst_vol = inst_vol # more interested in conditional vol if annualize.lower() == 'd': ann_cond_vol = res.conditional_volatility * np.sqrt(252) elif annualize.lower() == 'm': ann_cond_vol = res.conditional_volatility * np.sqrt(12) elif annualize.lower() == 'w': ann_cond_vol = res.conditional_volatility * np.sqrt(52) return ann_cond_vol * 0.01
def model_predict(trend_arima_fit, residual_arima_fit, trend_garch_order, residual_garch_order, trend, residual, seasonal, trend_diff_counts, residual_diff_counts, if_pred, start, end, period): """ trend_arima_fit: ARIMA model after fit the trend. residual_arima_fit: ARIMA model after fit the residual. trend_garch_order: best parameters for GARCH model after fit the trend_arima_fit.resid. residual_garch_order: best parameters for GARCH model after fit the residual_arima_fit.resid. trend: time series of trend. residual: time series of residual. seasonal: time series of seasonal. trend_diff_counts: int value indicating counts of diff for trend. residual_diff_counts: int values indicating counts of diff for residual. if_pred: boolen value indicating whether to predict or not. True presents predict, False means fit. start: string value indicating start date. end: string value indicating end date. period: int value indicating the period of seasonal. return predicted sequence. """ if if_pred: # get the first date after the last date in train. date_after_train = str(trend.index.tolist()[-1] + relativedelta(days=1)) # get the trend predicted sequence from the start of start to end trend_pred_seq = np.array( trend_arima_fit.predict(start=date_after_train, end=end, dynamic=True) ) # The dynamic keyword affects in-sample prediction. # get the residual predicted sequence from the start of start to end residual_pred_seq = np.array( residual_arima_fit.predict(start=date_after_train, end=end, dynamic=True)) # find the the corresponding seasonal sequence. pred_period = (datetime.datetime.strptime(end, '%Y-%m-%d %H:%M:%S') - datetime.datetime.strptime( date_after_train, '%Y-%m-%d %H:%M:%S')).days + 1 trend_pred_variance, residual_pred_variance = np.zeros( pred_period), np.zeros(pred_period) current_trend_resid, current_residual_resid = trend_arima_fit.resid, residual_arima_fit.resid for i in range(pred_period): trend_model = arch_model(current_trend_resid, mean="Constant", p=trend_garch_order[0], q=trend_garch_order[1], vol='GARCH') trend_model_fit = trend_model.fit(disp="off", update_freq=0, show_warning=False) trend_pred_variance[i] = np.sqrt( trend_model_fit.forecast(horizon=1).variance.values[-1, :][0] ) + trend_model_fit.forecast(horizon=1).mean.values[-1, :][0] current_trend_resid.append( pd.DataFrame.from_dict( { current_trend_resid.index.tolist()[-1] + relativedelta(days=1): trend_pred_variance[i] }, orient="index")) residual_model = arch_model(current_residual_resid, mean="Constant", p=residual_garch_order[0], q=residual_garch_order[1], vol='GARCH') residual_model_fit = residual_model.fit(disp="off", update_freq=0, show_warning=False) residual_pred_variance[i] = np.sqrt( residual_model_fit.forecast(horizon=1).variance.values[-1, :] [0]) + residual_model_fit.forecast( horizon=1).mean.values[-1, :][0] current_residual_resid.append( pd.DataFrame.from_dict( { current_residual_resid.index.tolist()[-1] + relativedelta(days=1): residual_pred_variance[i] }, orient="index")) trend_pred_seq = trend_pred_seq + trend_pred_variance residual_pred_seq = residual_pred_seq + residual_pred_variance trend_pred_seq = np.array( np.concatenate((np.array(trend.diff(trend_diff_counts).fillna(0)), trend_pred_seq))) residual_pred_seq = np.array( np.concatenate( (np.array(residual.diff(residual_diff_counts).fillna(0)), residual_pred_seq))) seasonal_pred_seq = list(seasonal[len(seasonal) - period:]) * (round( (pred_period) / period) + 1) seasonal_pred_seq = np.array(seasonal_pred_seq[0:pred_period]) else: trend_pred_seq = np.array(trend_arima_fit.fittedvalues) residual_pred_seq = np.array(residual_arima_fit.fittedvalues) seasonal_pred_seq = np.array(seasonal) while trend_diff_counts > 0 or residual_diff_counts > 0: if trend_diff_counts > 0: trend_pred_seq.cumsum() trend_diff_counts -= 1 if trend_diff_counts == 0: trend_pred_seq = trend_pred_seq + trend[0] if residual_diff_counts > 0: residual_pred_seq.cumsum() residual_diff_counts -= 1 if residual_diff_counts == 0: residual_pred_seq = residual_pred_seq + residual[0] if if_pred: pred_period = ( datetime.datetime.strptime(end, '%Y-%m-%d %H:%M:%S') - datetime.datetime.strptime(start, '%Y-%m-%d %H:%M:%S')).days + 1 return trend_pred_seq[len(trend_pred_seq)-pred_period:] + \ residual_pred_seq[len(residual_pred_seq)-pred_period:] + \ seasonal_pred_seq[len(seasonal_pred_seq)- pred_period:] else: return trend_pred_seq + residual_pred_seq + seasonal_pred_seq
# initialize an array for standard deviation variables nStocks = returnStocks.shape[1] stdPredictions = np.zeros(nStocks) # run the regression over all variables for index, stk in enumerate(returnVariables.columns): # create the garch model returnStock = returnDailyStocks.iloc[:, index].dropna() garchModel = arch_model(returnStock) # fit the garch model fitGarch = garchModel.fit(disp='off') # grab the forecasted standard deviation each day for the next 20 days stdGarchVals = np.sqrt(fitGarch.forecast(horizon=22).variance).iloc[-1, :] # convert the standard deviation to monthly standard deviation stdGarch = stdGarchVals.mean() * np.sqrt(22) # append to the array
order[1]=d order[2]=j except (ValueError,np.linalg.linalg.LinAlgError) as e: pass print AIC print order """ order = [4, 0, 1] model2 = ARIMA(y, order=(order[0], order[1], order[2])) model2_fit = model2.fit(disp=0) res = model2_fit.resid garch11 = arch_model(res, p=1, q=1) garch11_fit = garch11.fit(disp=0) gres = garch11_fit.resid omega = garch11_fit.params['omega'] alpha1 = garch11_fit.params['alpha[1]'] beta1 = garch11_fit.params['beta[1]'] cond_vol = garch11_fit.conditional_volatility[-1] forecast_vol = np.sqrt(omega + alpha1 * gres[-1]**2 + beta1 * cond_vol**2) pred2 = model2_fit.predict(start=0, end=0, dynamic=False) print forecast_vol #gredict=garch11_fit.forecast(start=len(y)-3,horizon=3, method='analytic') #gr1=gredict.mean['h.01'].iloc[-1]
X_test.head() # %% sts.adfuller(X_train.icici) # %% sgt.plot_acf(X_train.icici, lags=40, zero=False) plt.title("ACF ICICI") sgt.plot_pacf(X_train.icici, lags=40, zero=False) plt.title("PACF ICICI") plt.show() # %% model = auto_arima(X_train.icici, exogeneous=df_ret[["nsebank"]]) model.summary() # %% model1 = arch_model(df_ret.icici, mean="constant", vol="GARCH", p=1, q=1, dist="Normal") results1 = model1.fit(last_obs=start_date, update_freq=10) results1.summary() # %% pred_garch = results1.forecast(horizon=1, align="target") pred_garch.residual_variance[start_date:].plot(zorder=2) X_test.icici.abs().plot(zorder=1) plt.show() # %% pred_garch = results1.forecast(horizon=100, align="target") pred_100 = pred_garch.residual_variance[-30:] # %% pred_100.mean().T.plot()
def get_arch(TS, upper_=6): par = _get_best_model(TS, upper_=upper_) best_order = par[1] am = arch_model(TS, p=best_order[0], o=best_order[1], q=best_order[2], dist='StudentsT') res = am.fit(update_freq=5, disp='off') return res
def model_forecast(ret_train) : ret_train_1 = ret_train ret_mdl = arch_model(ret_train_1*100, p = 1, q = 1, o = 1, dist='skewt').fit(update_freq=1) forecasts = ret_mdl.forecast() return forecasts.mean.dropna(), forecasts.variance.dropna(), forecasts.residual_variance.dropna()
from sklearn import tree import csv with open('EURUSD240.csv', 'r') as csvfile: spamreader = list(csv.reader(csvfile)) print(spamreader[0]) from arch import arch_model am = arch_model(returns) res = am.fit(update_freq=5) print(res.summary()) [array([ 0.06169621]), array([-0.05147406]), array([ 0.04445121]), array([-0.01159501]), array([-0.03638469]), array([-0.04069594]), array([-0.04716281]), array([-0.00189471]), array([ 0.06169621]), array([ 0.03906215])] 151.0, 75.0, 141.0, 206.0, 135.0, 97.0, 138.0, 63.0, 110.0, 310.0
fig_ecart.add_trace(go.Scatter(x = dt["dates"], y=dt["high_to_median"], name='ecart à la mediane high', line=dict(color='royalblue', width=1))) fig_ecart.add_trace(go.Scatter(x = dt["dates"], y=dt["low_to_median"], name='ecart à la mediane low', line=dict(color='black', width=1))) fig_ecart.add_trace(go.Scatter(x = dt["dates"], y=dt["close_to_median"], name='ecart à la mediane close', line=dict(color='magenta', width=1))) fig_ecart.write_html("C://Users//shade//OneDrive//Documents//M2_TIDE//Algorithmique_et_Python//Projet_S1//ecart_mediane.html", auto_open = True) model = arch_model(train, mean='Zero', vol='ARCH', p=1) model_fit = model.fit() print(model_fit.summary()) dt.loc[1:, "open_diff1"].mean() fig_diff = go.Figure() fig_diff.add_trace(go.Scatter(x = dt["dates"], y=dt["open_diff1"], name='Variation open_high', line=dict(color='firebrick', width=1))) fig_diff.write_html("tx_open_prices.html", auto_open = True)
from arch import arch_model start = '1971-01-04' # data['Adj Close'].head().pct_change().dropna() jpy = web.DataReader('DEXJPUS', 'fred', start=start) print(jpy['DEXJPUS'].head().pct_change().dropna()) print("*"*60) ret = jpy['DEXJPUS'].pct_change().dropna() ## GARCH(1, 1) am = arch_model(ret) res = am.fit(update_freq=5) print(res.summary()) print("*"*60) ## GJR-GARCH start = '2010-01-01' jpy = web.DataReader('DEXJPUS', 'fred', start=start) ret = jpy['DEXJPUS'].pct_change().dropna() gjr_am = arch_model(ret, p=1, o=1, q=1) res = gjr_am.fit(update_freq=5, disp='off') print(res.summary()) print("*"*60)
for i in range(len(data) - window): gam11 = arch_model(data[i:i + window], p=1, q=1) resg11 = gam11.fit() yhat = resg11.forecast(horizon=1) empty.append(yhat.variance.values[-1, :]) return empty vol_forecast = roll_garch(roll_month_ret, window) vol_forecast = pd.DataFrame(vol_forecast, index=roll_month_ret[window:].index) #%% am = arch_model(roll_month_ret * 100, vol='Garch', p=1, o=0, q=1, dist='Normal') index = roll_month_ret.index start_loc = 0 window = 40 forecasts = pd.DataFrame() for i in range(len(roll_month_ret) - window): sys.stdout.write('.') sys.stdout.flush() res = am.fit(first_obs=i, last_obs=i + window, disp='off') temp = res.forecast(horizon=1).variance fcast = temp.iloc[i + window - 1] #.values # forecasts[fcast.name] = fcast forecasts_ = fcast forecasts = pd.concat([forecasts, forecasts_], axis=1)
mm, sea, remn = remainderizer(pun) arma = statsmodels.api.tsa.ARMA(remn.values.ravel(), (1,0)).fit() resid = remn.values.ravel() - arma.predict() pred = arma.predict(start = ts.size, end = ts.size) forecasted = mm[-1] + sea[str(month)+'_mean'].ix[dow[dt.weekday()]] + pred #### sampled sigma is a bit overestimated sigma_hat = np.std(pun.ix[pun.index.month == 10] - mm[-1]) + sea[str(month)+'_std'].ix[dow[dt.weekday()]] + np.std(resid) return (forecasted - 2*sigma_hat, forecasted - sigma_hat, forecasted, forecasted + sigma_hat, forecasted + 2*sigma_hat) ############################################################################### Forecast_(pun, 2016, 10, 26) Forecast_(pun, 2016, 10, 27) import arch arch_model = arch.arch_model(remn, mean = 'AR', vol='garch', p=1, q=1).fit() arch_model.summary() plt.figure() arch_model.plot() arch_model.params arch_model.rsquared np.mean(arch_model.resid) arch_model.conditional_volatility arch_model.hedgehog_plot(horizon = 40, step = 40) arch_model.forecast() fitted = remn + arch_model.resid plt.figure() plt.plot(fitted, color = 'black')
alpha_1_hat = est_results_vol.params[1] # Extract parameter alpha_1_hat resid_squ[0:5] # Robustness Check: 'arch' package # Estimation via joint (!) likelihood # #?arch_model model_arch_package = arch_model(returns, mean='ARX', lags=1, vol='ARCH', p=1) results_arch_package = model_arch_package.fit() arch_package_vol = results_arch_package.conditional_volatility # TODO: Comparison with squared returns # es50_id_sub.loc[2:n, 'volatility_arch_2pass'] = arch_vol es50_id_sub.loc[1:n, 'volatility_arch_package'] = arch_package_vol es50_id_sub.head() es50_id_sub[['volatility_arch_2pass', 'volatility_arch_package']].plot( subplots = True )
from statsmodels.tsa import stattools import matplotlib.pyplot as plt import numpy as np from arch import arch_model indexRet = pd.read_csv('index.csv', sep='\t') indexRet.index = pd.to_datetime(indexRet.Date) indexRet.head() np.unique(indexRet.CoName) taiexRet = indexRet.loc[indexRet.CoName == 'TSE Taiex '].ROI taiexRet.head() taiexRet.tail() taiexRet = taiexRet.astype(np.float).dropna() #繪制收益率平方序列圖 plt.subplot(211) plt.plot(taiexRet**2) plt.xticks([]) plt.title('Squared Daily Return of taiex') plt.subplot(212) plt.plot(np.abs(taiexRet)) plt.title('Absolute Daily Return of taiex') LjungBox = stattools.q_stat(stattools.acf(taiexRet**2)[1:13], len(taiexRet)) LjungBox[1][-1] am = arch_model(taiexRet) model = am.fit(update_freq=0) print(model.summary())
color='k') # # Modelling GARCH model # # $$\text{Mean equation:}$$ # $$r_{t}=\mu + \epsilon_{t}$$ # # $$\text{Volatility equation:}$$ # $$\sigma^{2}_{t}= \omega + \alpha \epsilon^{2}_{t} + \beta\sigma^{2}_{t-1}$$ # # $$\text{Volatility equation:}$$ # # $$\epsilon_{t}= \sigma_{t} e_{t}$$ # # $$e_{t} \sim N(0,1)$$ # # In[43]: am = arch_model(daily_return, p=1, o=0, q=1) res = am.fit(update_freq=1) print(res.summary()) # # Checking the residual # In[37]: fig = res.plot(annualize='D') # In[ ]: