def test_x_reformat_1var(exog_format): # (10,) # (1,10) # (n, 10) # (1,1,10) # (1,n,10) # {"x1"} : (10,) # {"x1"} : (1,10) # {"x1"} : (n,10) exog, ref = exog_format if exog is None: return if isinstance(exog, dict): nexog = len(exog) else: if np.ndim(exog) == 3: nexog = exog.shape[0] else: nexog = 1 cols = [f"x{i}" for i in range(1, nexog + 1)] rng = RandomState(12345) x = pd.DataFrame(rng.standard_normal((SP500.shape[0], nexog)), columns=cols, index=SP500.index) mod = ARX(SP500, lags=1, x=x) res = mod.fit() fcasts = res.forecast(horizon=10, x=exog, reindex=False) ref = res.forecast(horizon=10, x=ref, reindex=False) assert_allclose(fcasts.mean, ref.mean)
def main(ticker1, ticker2): df = pd.read_csv("./Data/close.csv", dtype={"date": str}) df2 = np.log(df.loc[:, [ticker1, ticker2]]).diff().dropna() x = df2[ticker1].values y = df2[ticker2].values A = np.vstack((np.ones_like(x), x)).T b = np.linalg.inv(A.T.dot(A)).dot(A.T).dot(y) resid = y - A.dot(b) resid_se = pd.Series(resid) std2_se = resid_se.rolling( window=100, ).apply(lambda x: sqrt(sum(np.diff(x)**2) / (len(x) - 1))) mean_se = resid_se.rolling( window=100, ).mean() ''' s_score = (pd.Series(resid_se) - mean_se) / std2_se ''' ar = ARX(resid_se, volatility=EGARCH(2, 0, 2)) ar.distribution = SkewStudent() res = ar.fit() s_score = pd.Series(resid) arg_lst = [ (s_score, resid_se, i / 100.0, j / 100.0, k / 100.0, l / 100.0, m / 100.0, n / 100.0) for i in xrange(15, 35, 5) for j in xrange(i + 1, 49, 5) for k in xrange(j + 1, 50, 5) for l in xrange(85, 65, -5) for m in xrange(l - 1, 51, -5) for n in xrange(m - 1, 50, -5) ] pool = mp.Pool(6) result = pool.map(back_test_sharp, arg_lst) pool.close() pool.join() with open("./pkl/EG_result_lst_{}_{}_sharp".format(ticker1, ticker2), "wb") as fp: cp.dump(result, fp) x_mean = x.mean() y_mean = y.mean() pearson = (x - x_mean).dot(y - y_mean) / sqrt(sum((x - x_mean)**2)) / sqrt(sum((y - y_mean)**2)) result.sort(key=lambda x: x[0], reverse=True) best = result[0] res = back_test((s_score, resid_se, best[1], best[2], best[3], best[4], best[5], best[6])) fig = plt.figure(figsize=(20, 10)) plt.plot(res[0]) plt.savefig("./Pics/net_value/EG_{}_{}.png".format(ticker1, ticker2)) del fig return pd.Series(res[0]).to_csv("./xlsx/EG_{}_{}_{}_{}_{}_{}_{}_{}_{}.csv".format(ticker1, ticker2, pearson, best[1], best[2], best[3], best[4], best[5], best[6]))
def run_garch_simple(y, mean_model, vol_model, split_date, x=None, verbose=False): # specify mean model if mean_model == "CONST": ls = ConstantMean(y) elif mean_model == 'LS': ls = LS(y=y, x=x) elif mean_model == 'ARX': ls = ARX(y=y, lags=1) else: print("Misspecified mean model name. Please choose between CONST, LS, ARX.") # specify volatility model if vol_model == "GARCH": ls.volatility = GARCH(p=1, q=1) elif vol_model == "EGARCH": ls.volatility = EGARCH(p=1, o=1, q=1) elif vol_model == "EWMA": ls.volatility = EWMAVariance(lam=None) else: print("Misspecified volatility process name. Please choose between GARCH, EGARCH, EWMA.") res = ls.fit(disp='off', last_obs=split_date) if verbose: display(Markdown('#### <br> <br> GARCH model results')) print(res.summary()) return res
def get_disaster_factors(innovation_method, agg_freq="mon", resample=True): r''' Function to get various disaster risk factors and their innovations. Args: innovation_method: String for how to compute innovations in disaster risk factors. 'AR' uses an AR1 model 'fd' uses first-differences agg_freq: can be either "mon" or "week" Returns: df: Dataframe where index is date and columns are various disaster risk factors df_innov: Dataframe containing innovations to disaster risk factors ''' if agg_freq == "mon": agg_freq = "date_mon" elif agg_freq == "week": agg_freq = "date_week" else: raise ValueError("agg_freq should be either 'mon' or 'week'") # == Check inputs == # if innovation_method not in ['AR', 'fd']: raise ValueError("innovation_method must be either 'AR' or 'fd'") # == Read in raw data == # raw_f = pd.read_csv("estimated_data/disaster_risk_measures/" +\ "disaster_risk_measures.csv") raw_f['date'] = pd.to_datetime(raw_f['date']) raw_f = raw_f[raw_f.agg_freq == agg_freq] # raw_f = raw_f[raw_f.variable.isin(["D_clamp", "rn_prob_5", "rn_prob_20", "rn_prob_80"]) & # raw_f.maturity.isin(["level", "30", "180"])] raw_f = raw_f[raw_f.variable.isin(["D_clamp"]) & raw_f.maturity.isin(["level"]) & (raw_f.level == "Ind")] # == Create variable names == # raw_f['name'] = raw_f['level'] + '_' + raw_f['variable'] +\ '_' + raw_f['maturity'].astype(str) # == Create pivot table, then resample to end of month == # pdf = raw_f.pivot_table(index='date', columns='name', values='value') if resample: pdf = pdf.resample('M').last() # == Compute innovations in each factor == # if innovation_method == 'fd': df = pdf.diff() elif innovation_method == 'AR': df = pd.DataFrame(index=pdf.index, columns=pdf.columns) for col in df.columns: ar = ARX(pdf[col], lags=[1]).fit() df.loc[ar.resid.index, col] = ar.resid.values df = df.astype(float) return pdf, df
def estimate_qar(y, p=1, q=1, disp=1): """ Estimates a QAR(p, q) on data y. disp Returns statsmodels.fitted object. """ lags = p qarpq = QAR(y, p=lags, q=1) am = ARX(y, lags=lags, constant=True) first_stage = am.fit() params = np.r_[first_stage.params[:-1], 100 * np.zeros(lags), 100 * np.zeros(qarpq.q), 1 * np.sqrt(np.abs(first_stage.params[-1]))] #] results = qarpq.fit(maxiter=50000, start_params=params, disp=disp) return results
def test_x_exceptions(): res = ARX(SP500, lags=1).fit(disp="off") with pytest.raises(TypeError, match="x is not None but"): res.forecast(reindex=False, x=SP500) x = SP500.copy() x[:] = np.random.standard_normal(SP500.shape) res = ARX(SP500, lags=1, x=x).fit(disp="off") with pytest.raises(TypeError, match="x is None but the model"): res.forecast(reindex=False) res = ARX(SP500, lags=1, x=x).fit(disp="off") with pytest.raises(ValueError, match="x must have the same"): res.forecast(reindex=False, x={}) with pytest.raises(ValueError, match="x must have the same"): res.forecast(reindex=False, x={"x0": x, "x1": x}) with pytest.raises(KeyError, match="The keys of x must exactly"): res.forecast(reindex=False, x={"z": x}) with pytest.raises(ValueError, match="The arrays contained in the dictionary"): _x = np.asarray(x).reshape((1, x.shape[0], 1)) res.forecast(reindex=False, x={"x0": _x}) x2 = pd.concat([x, x], 1) x2.columns = ["x0", "x1"] x2.iloc[:, 1] = np.random.standard_normal(SP500.shape) res = ARX(SP500, lags=1, x=x2).fit(disp="off") with pytest.raises(ValueError, match="The shapes of the arrays contained"): res.forecast(reindex=False, x={ "x0": x2.iloc[:, 0], "x1": x2.iloc[10:, 1:] }) with pytest.raises(ValueError, match="1- and 2-dimensional x values"): res.forecast(reindex=False, x=x2) with pytest.raises(ValueError, match="The leading dimension of x"): _x2 = np.asarray(x2) _x2 = _x2.reshape((1, -1, 2)) res.forecast(reindex=False, x=_x2) with pytest.raises(ValueError, match="The number of values passed"): res.forecast(reindex=False, x=np.empty((2, SP500.shape[0], 3))) with pytest.raises(ValueError, match="The shape of x does not satisfy the"): res.forecast(reindex=False, x=np.empty((2, SP500.shape[0] // 2, 1)))
def test_arx_no_lags(): mod = ARX(SP500, volatility=GARCH()) res = mod.fit(disp="off") assert res.params.shape[0] == 4 assert "lags" not in mod._model_description(include_lags=False)
HARX, ConstantMean, ConstantVariance, EWMAVariance, MIDASHyperbolic, RiskMetrics2006, ZeroMean, arch_model, ) from arch.univariate.mean import _ar_forecast, _ar_to_impulse SP500 = 100 * sp500.load()["Adj Close"].pct_change().dropna() MEAN_MODELS = [ HARX(SP500, lags=[1, 5]), ARX(SP500, lags=2), ConstantMean(SP500), ZeroMean(SP500), ] VOLATILITIES = [ ConstantVariance(), GARCH(), FIGARCH(), EWMAVariance(lam=0.94), MIDASHyperbolic(), HARCH(lags=[1, 5, 22]), RiskMetrics2006(), APARCH(), EGARCH(), ]
def return_sampler_garch( N_train: int, mean_process: str = "Constant", lags_mean_process: int = None, vol_process: str = "GARCH", distr_noise: str = "normal", seed: int = None, seed_param: int = None, p_arg: list = None, ) -> Tuple[np.ndarray, pd.Series]: # https://stats.stackexchange.com/questions/61824/how-to-interpret-garch-parameters # https://arch.readthedocs.io/en/latest/univariate/introduction.html # https://arch.readthedocs.io/en/latest/univariate/volatility.html # https://github.com/bashtage/arch/blob/master/arch/univariate/volatility.py """ Generates financial returns driven by mean-reverting factors. Parameters ---------- N_train: int Length of the experiment mean_process: str Mean process for the returns. It can be 'Constant' or 'AR' lags_mean_process: int Order of autoregressive lag if mean_process is AR vol_process: str Volatility process for the returns. It can be 'GARCH', 'EGARCH', 'TGARCH', 'ARCH', 'HARCH', 'FIGARCH' or 'Constant'. Note that different volatility processes requires different parameter, which are hard coded. If you want to pass them explicitly, use p_arg. distr_noise: str Distribution for the unpredictable component of the returns. It can be 'normal', 'studt', 'skewstud' or 'ged'. Note that different distributions requires different parameter, which are hard coded. If you want to pass them explicitly, use p_arg. seed: int Seed for experiment reproducibility seed_param: int Seed for drawing randomly the parameters needed for the simulation. The ranges provided are obtained as average lower and upper bounds of several GARCH-type model fitting on real financial time-series. p_arg: pd.Series Pandas series of parameters that you want to pass explicitly. They need to be passed in the right order. Check documentation of the arch python package (https://arch.readthedocs.io/en/latest/index.html) for more details. Returns ------- simulations['data'].values: np.ndarray Simulated series of returns p: pd.Series Series of parameters used for simulation """ names = [] vals = [] if seed_param is None: seed_param = seed rng = np.random.RandomState(seed_param) # choose mean process if mean_process == "Constant": model = ConstantMean(None) names.append("const") if seed_param: vals.append(rng.uniform(0.01, 0.09)) else: vals.append(0.0) elif mean_process == "AR": model = ARX(None, lags=lags_mean_process) names.append("const") vals.append(0.0) if seed_param: for i in range(lags_mean_process): names.append("lag{}".format(i)) vals.append(rng.uniform(-0.09, 0.09)) else: for i in range(lags_mean_process): names.append("lag{}".format(i)) vals.append(0.9) else: return print("This mean process doesn't exist or it's not available.") sys.exit() # choose volatility process if vol_process == "GARCH": model.volatility = GARCH(p=1, q=1) names.extend(["omega", "alpha", "beta"]) if seed_param: om = rng.uniform(0.03, 0.1) alph = rng.uniform(0.05, 0.1) b = rng.uniform(0.86, 0.92) garch_p = np.array([om, alph, b]) / (np.array([om, alph, b]).sum()) else: om = 0.01 alph = 0.05 b = 0.94 garch_p = np.array([om, alph, b]) vals.extend(list(garch_p)) elif vol_process == "ARCH": model.volatility = GARCH(p=1, q=0) names.extend(["omega", "alpha"]) if seed_param: om = rng.uniform(1.4, 4.0) alph = rng.uniform(0.1, 0.6) else: om = 0.01 alph = 0.4 garch_p = np.array([om, alph]) vals.extend(list(garch_p)) elif vol_process == "HARCH": model.volatility = HARCH(lags=[1, 5, 22]) names.extend(["omega", "alpha[1]", "alpha[5]", "alpha[22]"]) if seed_param: om = rng.uniform(1.2, 0.5) alph1 = rng.uniform(0.01, 0.1) alph5 = rng.uniform(0.05, 0.3) alph22 = rng.uniform(0.4, 0.7) else: om = 0.01 alph1 = 0.05 alph5 = 0.15 alph22 = 0.5 garch_p = np.array([om, alph1, alph5, alph22]) vals.extend(list(garch_p)) elif vol_process == "FIGARCH": model.volatility = FIGARCH(p=1, q=1) names.extend(["omega", "phi", "d", "beta"]) if seed_param: om = rng.uniform(0.05, 0.03) phi = rng.uniform(0.1, 0.35) d = rng.uniform(0.3, 0.5) beta = rng.uniform(0.4, 0.7) else: om = 0.01 phi = 0.2 d = 0.2 beta = 0.55 garch_p = np.array([om, phi, d, beta]) vals.extend(list(garch_p)) elif vol_process == "TGARCH": model.volatility = GARCH(p=1, o=1, q=1) names.extend(["omega", "alpha", "gamma", "beta"]) if seed_param: om = rng.uniform(0.02, 0.15) alph = rng.uniform(0.01, 0.07) gamma = rng.uniform(0.03, 0.1) b = rng.uniform(0.88, 0.94) else: om = 0.01 alph = 0.05 gamma = 0.04 b = 0.90 garch_p = np.array([om, alph, gamma, b]) vals.extend(list(garch_p)) elif vol_process == "EGARCH": model.volatility = EGARCH(p=1, o=1, q=1) names.extend(["omega", "alpha", "gamma", "beta"]) if seed_param: om = rng.uniform(0.01, 0.03) alph = rng.uniform(0.06, 0.17) gamma = rng.uniform(-0.05, -0.02) b = rng.uniform(0.97, 0.99) garch_p = np.array([om, alph, gamma, b]) / (np.array( [om, alph, gamma, b]).sum()) else: om = 0.01 alph = 0.05 gamma = -0.02 b = 0.94 garch_p = np.array([om, alph, gamma, b]) vals.extend(list(garch_p)) elif vol_process == "Constant": model.volatility = ConstantVariance() names.append("sigma_const") vals.append(rng.uniform(0.02, 0.05)) else: print("This volatility process doesn't exist or it's not available.") sys.exit() if distr_noise == "normal": model.distribution = Normal(np.random.RandomState(seed)) elif distr_noise == "studt": model.distribution = StudentsT(np.random.RandomState(seed)) names.append("nu") if seed_param: vals.append(rng.randint(6.0, 10.0)) else: vals.append(8.0) elif distr_noise == "skewstud": model.distribution = SkewStudent(np.random.RandomState(seed)) names.extend(["nu", "lambda"]) if seed_param: vals.extend([rng.uniform(6.0, 10.0), rng.uniform(-0.1, 0.1)]) else: vals.extend([8.0, 0.05]) elif distr_noise == "ged": model.distribution = GeneralizedError(np.random.RandomState(seed)) names.append("nu") if seed_param: vals.append(rng.uniform(1.05, 3.0)) else: vals.append(2.0) else: print("This noise distribution doesn't exist or it's not available.") sys.exit() p = pd.Series(data=vals, index=names) if p_arg: p = p_arg simulations = model.simulate(p, N_train) / 100 return simulations["data"].values, p
print(model.summary()) #5. cny = web.DataReader('CNY=X', 'yahoo', dt.datetime(2015, 1, 1), dt.datetime(2015, 12, 31)) ret = (cny.Close - cny.Close.shift(1)) / cny.Close.shift(1) ret = ret.dropna() cny.Close.plot() ret.plot() plot_acf(ret, lags=20) plot_pacf(ret, lags=20) LjungBox = stattools.q_stat(stattools.acf(ret)[1:13], len(ret)) LjungBox[1][-1] (ret**2).plot() plot_acf(ret**2, lags=20) plot_pacf(ret**2, lags=20) LjungBox = stattools.q_stat(stattools.acf(ret**2)[1:13], len(ret)) LjungBox[1][-1] from arch.univariate import ARX, GARCH model = ARX(ret, lags=1) model.volatility = GARCH() res = model.fit() print(res.summary())
eqCurves = pd.DataFrame(index=signal.index, columns=['Buy and Hold', 'Strategy']) eqCurves['Buy and Hold'] = returns['Buy and Hold'].cumsum() + 1 eqCurves['Strategy'] = returns['Strategy'].cumsum() + 1 eqCurves['Strategy'].plot(figsize=(10, 8)) eqCurves['Buy and Hold'].plot() plt.legend() plt.show() # # From Arch website # In[273]: from arch.univariate import ARX ar = ARX(Y, lags=30) print(ar.fit().summary()) # In[270]: from arch.univariate import ARCH, GARCH ar.volatility = GARCH(p=3, o=0, q=3) res = ar.fit(update_freq=0, disp='off') p(res.summary()) # In[265]: from arch.univariate import StudentsT ar.distribution = StudentsT() res = ar.fit(update_freq=0, disp='off') p(res.summary())
def get_disaster_factors(innovation_method, level_filter=None, var_filter=None, day_filter=None): r''' Function to get various disaster risk factors and their innovations. Args: innovation_method: String for how to compute innovations in disaster risk factors. 'AR' uses an AR1 model 'fd' uses first-differences level_filter: List of filters to apply to whether disaster risk comes from sp_500 or individual firms (ind) var_filter: List of filters to apply to the disaster risk measure (D, rn_prob_2sigma, rn_prob_20, rn_prob_40, rb_prob_60) day_filter: List of filters to apply to duration of options that went into measure (30, 60, 120) Returns: df: Dataframe where index is date and columns are various disaster risk factors df_innov: Dataframe containing innovations to disaster risk factors ''' # == Check inputs == # if innovation_method not in ['AR', 'fd']: raise ValueError("innovation_method must be either 'AR' or 'fd'") # == Read in raw data == # raw_f = pd.read_csv("estimated_data/disaster_risk_measures/" +\ "combined_disaster_df.csv") raw_f['date_eom'] = pd.to_datetime(raw_f['date']) raw_f.drop('date', axis=1, inplace=True) # == Focus only on direct (for S&P 500) and filtered mean aggregation == # raw_f = raw_f[raw_f.agg_type.isin(['direct', 'mean_filter'])] # == Apply other filters == # if level_filter is not None: raw_f = raw_f[raw_f['level'].isin(level_filter)] if var_filter is not None: raw_f = raw_f[raw_f['var'].isin(var_filter)] if day_filter is not None: raw_f = raw_f[raw_f['days'].isin(day_filter)] # == Create variable names == # raw_f['name'] = raw_f['level'] + '_' + raw_f['var'] +\ '_' + raw_f['days'].astype(str) # == Create pivot table, then resample to end of month == # pdf = raw_f.pivot_table(index='date_eom', columns='name', values='value') pdf = pdf.resample('M').last() # == Compute innovations in each factor == # if innovation_method == 'fd': df = pdf.diff() elif innovation_method == 'AR': df = pd.DataFrame(index=pdf.index, columns=pdf.columns) for col in df.columns: ar = ARX(pdf[col], lags=[1]).fit() df.loc[ar.resid.index, col] = ar.resid.values df = df.astype(float) return pdf, df
def main(fund_price_file=None, fund_region='EU', returns_type='pct', tag=''): os.chdir(os.path.dirname( __file__)) # switch to the folder where you script is stored output_folder = '{}_{}_{}_return'.format(tag, fund_region, returns_type) output_dir = os.path.join(os.path.dirname(__file__), output_folder) ########################################################################## # read four factors of fama french data ########################################################################## if fund_region == 'EU': file_3_Factors = 'Europe_3_Factors_Daily.csv' file_MOM_Factor = 'Europe_MOM_Factor_Daily.csv' df_threefators = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=6).drop('RF', axis=1)['2013':'2018'] df_forthfactor = pd.read_csv(file_MOM_Factor, parse_dates=[0], index_col=0, skiprows=6)['2013':'2018'] ff_rf = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=6)['RF']['2013':'2018'] if fund_region == 'US': file_3_Factors = 'F-F_Research_Data_Factors_daily.CSV' file_MOM_Factor = 'F-F_Momentum_Factor_daily.csv' df_threefators = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=4).drop('RF', axis=1)['2013':'2018'] df_forthfactor = pd.read_csv(file_MOM_Factor, parse_dates=[0], index_col=0, skiprows=13)['2013':'2018'] ff_rf = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=4)['RF']['2013':'2018'] if fund_region == 'Global': file_3_Factors = 'Global_3_Factors_daily.CSV' file_MOM_Factor = 'Global_MOM_Factor_daily.csv' df_threefators = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=6).drop('RF', axis=1)['2013':'2018'] df_forthfactor = pd.read_csv(file_MOM_Factor, parse_dates=[0], index_col=0, skiprows=6)['2013':'2018'] ff_rf = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=6)['RF']['2013':'2018'] factors = pd.concat([df_threefators, df_forthfactor], axis=1) factors.index = pd.to_datetime(factors.index) factors = factors / 100 ff_rf = ff_rf / 100 print(factors.head()) print(factors.describe()) ########################################################################## # read green fund daily price ########################################################################## file = fund_price_file xl = pd.ExcelFile(file) print(xl.sheet_names) stats_list = [] ols_list = [] pvalues_list = [] garch_list = [] arx_list = [] if not os.path.exists(output_dir): os.makedirs(output_dir) os.chdir(output_dir) for select_sheet in xl.sheet_names: df = xl.parse(select_sheet, parse_dates=[0], index_col=0, pase_dates=True, skiprows=[0, 1, 2, 4], header=0) df.index = pd.to_datetime(df.index) print('Import sheet: {}'.format(select_sheet)) # skip/filter Nan colomns print('the following columns are not numeric ') print(df.select_dtypes(exclude=['float64'])) df = df.select_dtypes(include=['float64']) ########################################################################## # calculate daily average returns and describe stats ; https://stackoverflow.com/questions/35365545/calculating-cumulative-returns-with-pandas-dataframe ########################################################################## if returns_type == 'pct': # simple return returns = df.pct_change(limit=2).mean(axis=1)['2013':'2018'] if returns_type == 'cum': # cumulative_return returns = df.pct_change(limit=2)['2013':'2018'] returns = ((1 + returns).cumprod() - 1).mean(axis=1) if returns_type == 'log': # log return returns = np.log(1 + df.pct_change(limit=2)).mean( axis=1)['2013':'2018'] print(returns.describe()) # check data completeness print('The following date have NaN return value') print(returns[returns.isna().any()]) returns.fillna(method='bfill', inplace=True) returns.plot() plt.savefig('{}_daily_returns.png'.format(select_sheet)) plt.close() stats_current = returns.describe() stats_current.name = select_sheet stats_list.append(stats_current) ########################################################################## # linear regression of fama french factors ########################################################################## slice_index_ols = returns.index.intersection(factors.index) X = factors.loc[slice_index_ols] y = returns.loc[slice_index_ols] - ff_rf[slice_index_ols] X_with_constant = sm.add_constant(X) model_static = sm.OLS(y, X_with_constant, missing='drop').fit() print(model_static.params) ols_current = model_static.params ols_current.name = select_sheet ols_list.append(ols_current) pvalues_current = model_static.pvalues pvalues_current.name = select_sheet pvalues_list.append(pvalues_current) with open('ols_summary_{}.csv'.format(select_sheet), 'w') as f: f.write(model_static.summary().as_csv()) ########################################################################## # arch analysis of volatility ########################################################################## am = arch_model(returns) res = am.fit() print(res.summary()) garch_current = res.params garch_current.name = select_sheet garch_list.append(garch_current) with open('garch_summary_{}.csv'.format(select_sheet), 'w') as f: f.write(res.summary().as_csv()) res.plot(annualize='D') plt.savefig('garch_{}.png'.format(select_sheet)) plt.close() ########################################################################## # arx analysis of volatility ########################################################################## from arch.univariate import ARX arx = ARX(returns, lags=[1]) res = arx.fit() print(res.summary()) arx_current = res.params arx_current.name = select_sheet arx_list.append(arx_current) with open('arx_summary_{}.csv'.format(select_sheet), 'w') as f: f.write(res.summary().as_csv()) res.plot(annualize='D') plt.savefig('arx_{}.png'.format(select_sheet)) plt.close() ########################################################################## # write all results ########################################################################## pd.concat(stats_list, axis=1).to_csv('greenfund_stats.csv') pd.concat(ols_list, axis=1).to_csv('greenfund_ols.csv') pd.concat(pvalues_list, axis=1).to_csv('greenfund_pvalues.csv') pd.concat(garch_list, axis=1).to_csv('greenfund_garch.csv') pd.concat(arx_list, axis=1).to_csv('greenfund_arx.csv')
sm.graphics.tsa.plot_acf(rates) # ARCH effect ar_res = ar_select_order(rates, 5).model.fit() # Test of no serial correlation and homoskedasticity print(ar_res.diagnostic_summary()) print(ar_res.summary()) plt.figure() plt.plot(ar_res.resid) # a = ar_res.resid # a_res = ar_select_order(a, 5).model.fit() # print(a_res.diagnostic_summary()) # Fit with GARCH(p, q) ar = ARX(rates, lags=[1, 2]) # Mean model ar.volatility = GARCH(p=1, q=1) # Volatility model res = ar.fit() res.plot() print(res.summary()) # Forecast drop = len(data) - len(rates) start = 3254 - 2 - drop end = 3262 - 2 - drop var = res.forecast(start=start, horizon=5, method='simulation').variance[start:1 + end] var.plot() entry = [ '2012:06:20',
from statsmodels.tsa.arima_model import ARMA import pandas import numpy import statsmodels.api as sm prices = pandas.read_csv("prices.csv", parse_dates=['Date'], index_col=0) tickers = prices.columns[:-2] prices = prices.resample('W').agg(lambda x: x[-1]) prices.dropna(axis=0, how='any', inplace=True) rf = prices['^TNX'].values[:-1] rf /= (52 * 100) returns = prices.iloc[:, :-1].pct_change()[1:] rm = returns['^GSPC'].values ri = returns.iloc[:, :-1].values Ri = ri - rf[:, numpy.newaxis] Rm = rm - rf model = sm.OLS(Ri, sm.add_constant(Rm)) results = model.fit() alpha, beta = results.params epsilon = numpy.sqrt(Ri.var(axis=0) - beta**2 * Rm.var(axis=0)) output = pandas.DataFrame(columns=['alpha', 'beta', 'epsilon'], index=tickers, data=numpy.array([alpha, beta, epsilon]).T) output.to_csv("coefficients.csv") from arch.univariate import ARX, GARCH arx = ARX(rm, lags=1) arx.volatility = GARCH() res = arx.fit(disp='off') pandas.DataFrame(res.params).to_csv("parameters.csv")
print(res.summary()) res.plot(annualize='D') # In[56]: res.plot(annualize='D') # In[58]: # AR from arch.univariate import ARX ar = ARX(ts_data, lags = [1, 3, 12]) # print(ar.fit().summary()) # In[60]: # Volatility Processes from arch.univariate import ARCH, GARCH ar.volatility = ARCH(p=5) res = ar.fit(update_freq=0, disp='off') # print(res.summary()) fig = res.plot() # Distribution from arch.univariate import StudentsT