def test_x_reformat_1var(exog_format): # (10,) # (1,10) # (n, 10) # (1,1,10) # (1,n,10) # {"x1"} : (10,) # {"x1"} : (1,10) # {"x1"} : (n,10) exog, ref = exog_format if exog is None: return if isinstance(exog, dict): nexog = len(exog) else: if np.ndim(exog) == 3: nexog = exog.shape[0] else: nexog = 1 cols = [f"x{i}" for i in range(1, nexog + 1)] rng = RandomState(12345) x = pd.DataFrame(rng.standard_normal((SP500.shape[0], nexog)), columns=cols, index=SP500.index) mod = ARX(SP500, lags=1, x=x) res = mod.fit() fcasts = res.forecast(horizon=10, x=exog, reindex=False) ref = res.forecast(horizon=10, x=ref, reindex=False) assert_allclose(fcasts.mean, ref.mean)
def main(ticker1, ticker2): df = pd.read_csv("./Data/close.csv", dtype={"date": str}) df2 = np.log(df.loc[:, [ticker1, ticker2]]).diff().dropna() x = df2[ticker1].values y = df2[ticker2].values A = np.vstack((np.ones_like(x), x)).T b = np.linalg.inv(A.T.dot(A)).dot(A.T).dot(y) resid = y - A.dot(b) resid_se = pd.Series(resid) std2_se = resid_se.rolling( window=100, ).apply(lambda x: sqrt(sum(np.diff(x)**2) / (len(x) - 1))) mean_se = resid_se.rolling( window=100, ).mean() ''' s_score = (pd.Series(resid_se) - mean_se) / std2_se ''' ar = ARX(resid_se, volatility=EGARCH(2, 0, 2)) ar.distribution = SkewStudent() res = ar.fit() s_score = pd.Series(resid) arg_lst = [ (s_score, resid_se, i / 100.0, j / 100.0, k / 100.0, l / 100.0, m / 100.0, n / 100.0) for i in xrange(15, 35, 5) for j in xrange(i + 1, 49, 5) for k in xrange(j + 1, 50, 5) for l in xrange(85, 65, -5) for m in xrange(l - 1, 51, -5) for n in xrange(m - 1, 50, -5) ] pool = mp.Pool(6) result = pool.map(back_test_sharp, arg_lst) pool.close() pool.join() with open("./pkl/EG_result_lst_{}_{}_sharp".format(ticker1, ticker2), "wb") as fp: cp.dump(result, fp) x_mean = x.mean() y_mean = y.mean() pearson = (x - x_mean).dot(y - y_mean) / sqrt(sum((x - x_mean)**2)) / sqrt(sum((y - y_mean)**2)) result.sort(key=lambda x: x[0], reverse=True) best = result[0] res = back_test((s_score, resid_se, best[1], best[2], best[3], best[4], best[5], best[6])) fig = plt.figure(figsize=(20, 10)) plt.plot(res[0]) plt.savefig("./Pics/net_value/EG_{}_{}.png".format(ticker1, ticker2)) del fig return pd.Series(res[0]).to_csv("./xlsx/EG_{}_{}_{}_{}_{}_{}_{}_{}_{}.csv".format(ticker1, ticker2, pearson, best[1], best[2], best[3], best[4], best[5], best[6]))
def estimate_qar(y, p=1, q=1, disp=1): """ Estimates a QAR(p, q) on data y. disp Returns statsmodels.fitted object. """ lags = p qarpq = QAR(y, p=lags, q=1) am = ARX(y, lags=lags, constant=True) first_stage = am.fit() params = np.r_[first_stage.params[:-1], 100 * np.zeros(lags), 100 * np.zeros(qarpq.q), 1 * np.sqrt(np.abs(first_stage.params[-1]))] #] results = qarpq.fit(maxiter=50000, start_params=params, disp=disp) return results
def test_arx_no_lags(): mod = ARX(SP500, volatility=GARCH()) res = mod.fit(disp="off") assert res.params.shape[0] == 4 assert "lags" not in mod._model_description(include_lags=False)
print(model.summary()) #5. cny = web.DataReader('CNY=X', 'yahoo', dt.datetime(2015, 1, 1), dt.datetime(2015, 12, 31)) ret = (cny.Close - cny.Close.shift(1)) / cny.Close.shift(1) ret = ret.dropna() cny.Close.plot() ret.plot() plot_acf(ret, lags=20) plot_pacf(ret, lags=20) LjungBox = stattools.q_stat(stattools.acf(ret)[1:13], len(ret)) LjungBox[1][-1] (ret**2).plot() plot_acf(ret**2, lags=20) plot_pacf(ret**2, lags=20) LjungBox = stattools.q_stat(stattools.acf(ret**2)[1:13], len(ret)) LjungBox[1][-1] from arch.univariate import ARX, GARCH model = ARX(ret, lags=1) model.volatility = GARCH() res = model.fit() print(res.summary())
columns=['Buy and Hold', 'Strategy']) eqCurves['Buy and Hold'] = returns['Buy and Hold'].cumsum() + 1 eqCurves['Strategy'] = returns['Strategy'].cumsum() + 1 eqCurves['Strategy'].plot(figsize=(10, 8)) eqCurves['Buy and Hold'].plot() plt.legend() plt.show() # # From Arch website # In[273]: from arch.univariate import ARX ar = ARX(Y, lags=30) print(ar.fit().summary()) # In[270]: from arch.univariate import ARCH, GARCH ar.volatility = GARCH(p=3, o=0, q=3) res = ar.fit(update_freq=0, disp='off') p(res.summary()) # In[265]: from arch.univariate import StudentsT ar.distribution = StudentsT() res = ar.fit(update_freq=0, disp='off') p(res.summary())
def main(fund_price_file=None, fund_region='EU', returns_type='pct', tag=''): os.chdir(os.path.dirname( __file__)) # switch to the folder where you script is stored output_folder = '{}_{}_{}_return'.format(tag, fund_region, returns_type) output_dir = os.path.join(os.path.dirname(__file__), output_folder) ########################################################################## # read four factors of fama french data ########################################################################## if fund_region == 'EU': file_3_Factors = 'Europe_3_Factors_Daily.csv' file_MOM_Factor = 'Europe_MOM_Factor_Daily.csv' df_threefators = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=6).drop('RF', axis=1)['2013':'2018'] df_forthfactor = pd.read_csv(file_MOM_Factor, parse_dates=[0], index_col=0, skiprows=6)['2013':'2018'] ff_rf = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=6)['RF']['2013':'2018'] if fund_region == 'US': file_3_Factors = 'F-F_Research_Data_Factors_daily.CSV' file_MOM_Factor = 'F-F_Momentum_Factor_daily.csv' df_threefators = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=4).drop('RF', axis=1)['2013':'2018'] df_forthfactor = pd.read_csv(file_MOM_Factor, parse_dates=[0], index_col=0, skiprows=13)['2013':'2018'] ff_rf = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=4)['RF']['2013':'2018'] if fund_region == 'Global': file_3_Factors = 'Global_3_Factors_daily.CSV' file_MOM_Factor = 'Global_MOM_Factor_daily.csv' df_threefators = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=6).drop('RF', axis=1)['2013':'2018'] df_forthfactor = pd.read_csv(file_MOM_Factor, parse_dates=[0], index_col=0, skiprows=6)['2013':'2018'] ff_rf = pd.read_csv(file_3_Factors, parse_dates=[0], index_col=0, skiprows=6)['RF']['2013':'2018'] factors = pd.concat([df_threefators, df_forthfactor], axis=1) factors.index = pd.to_datetime(factors.index) factors = factors / 100 ff_rf = ff_rf / 100 print(factors.head()) print(factors.describe()) ########################################################################## # read green fund daily price ########################################################################## file = fund_price_file xl = pd.ExcelFile(file) print(xl.sheet_names) stats_list = [] ols_list = [] pvalues_list = [] garch_list = [] arx_list = [] if not os.path.exists(output_dir): os.makedirs(output_dir) os.chdir(output_dir) for select_sheet in xl.sheet_names: df = xl.parse(select_sheet, parse_dates=[0], index_col=0, pase_dates=True, skiprows=[0, 1, 2, 4], header=0) df.index = pd.to_datetime(df.index) print('Import sheet: {}'.format(select_sheet)) # skip/filter Nan colomns print('the following columns are not numeric ') print(df.select_dtypes(exclude=['float64'])) df = df.select_dtypes(include=['float64']) ########################################################################## # calculate daily average returns and describe stats ; https://stackoverflow.com/questions/35365545/calculating-cumulative-returns-with-pandas-dataframe ########################################################################## if returns_type == 'pct': # simple return returns = df.pct_change(limit=2).mean(axis=1)['2013':'2018'] if returns_type == 'cum': # cumulative_return returns = df.pct_change(limit=2)['2013':'2018'] returns = ((1 + returns).cumprod() - 1).mean(axis=1) if returns_type == 'log': # log return returns = np.log(1 + df.pct_change(limit=2)).mean( axis=1)['2013':'2018'] print(returns.describe()) # check data completeness print('The following date have NaN return value') print(returns[returns.isna().any()]) returns.fillna(method='bfill', inplace=True) returns.plot() plt.savefig('{}_daily_returns.png'.format(select_sheet)) plt.close() stats_current = returns.describe() stats_current.name = select_sheet stats_list.append(stats_current) ########################################################################## # linear regression of fama french factors ########################################################################## slice_index_ols = returns.index.intersection(factors.index) X = factors.loc[slice_index_ols] y = returns.loc[slice_index_ols] - ff_rf[slice_index_ols] X_with_constant = sm.add_constant(X) model_static = sm.OLS(y, X_with_constant, missing='drop').fit() print(model_static.params) ols_current = model_static.params ols_current.name = select_sheet ols_list.append(ols_current) pvalues_current = model_static.pvalues pvalues_current.name = select_sheet pvalues_list.append(pvalues_current) with open('ols_summary_{}.csv'.format(select_sheet), 'w') as f: f.write(model_static.summary().as_csv()) ########################################################################## # arch analysis of volatility ########################################################################## am = arch_model(returns) res = am.fit() print(res.summary()) garch_current = res.params garch_current.name = select_sheet garch_list.append(garch_current) with open('garch_summary_{}.csv'.format(select_sheet), 'w') as f: f.write(res.summary().as_csv()) res.plot(annualize='D') plt.savefig('garch_{}.png'.format(select_sheet)) plt.close() ########################################################################## # arx analysis of volatility ########################################################################## from arch.univariate import ARX arx = ARX(returns, lags=[1]) res = arx.fit() print(res.summary()) arx_current = res.params arx_current.name = select_sheet arx_list.append(arx_current) with open('arx_summary_{}.csv'.format(select_sheet), 'w') as f: f.write(res.summary().as_csv()) res.plot(annualize='D') plt.savefig('arx_{}.png'.format(select_sheet)) plt.close() ########################################################################## # write all results ########################################################################## pd.concat(stats_list, axis=1).to_csv('greenfund_stats.csv') pd.concat(ols_list, axis=1).to_csv('greenfund_ols.csv') pd.concat(pvalues_list, axis=1).to_csv('greenfund_pvalues.csv') pd.concat(garch_list, axis=1).to_csv('greenfund_garch.csv') pd.concat(arx_list, axis=1).to_csv('greenfund_arx.csv')
# ARCH effect ar_res = ar_select_order(rates, 5).model.fit() # Test of no serial correlation and homoskedasticity print(ar_res.diagnostic_summary()) print(ar_res.summary()) plt.figure() plt.plot(ar_res.resid) # a = ar_res.resid # a_res = ar_select_order(a, 5).model.fit() # print(a_res.diagnostic_summary()) # Fit with GARCH(p, q) ar = ARX(rates, lags=[1, 2]) # Mean model ar.volatility = GARCH(p=1, q=1) # Volatility model res = ar.fit() res.plot() print(res.summary()) # Forecast drop = len(data) - len(rates) start = 3254 - 2 - drop end = 3262 - 2 - drop var = res.forecast(start=start, horizon=5, method='simulation').variance[start:1 + end] var.plot() entry = [ '2012:06:20', '2012:06:21', '2012:06:22',
from statsmodels.tsa.arima_model import ARMA import pandas import numpy import statsmodels.api as sm prices = pandas.read_csv("prices.csv", parse_dates=['Date'], index_col=0) tickers = prices.columns[:-2] prices = prices.resample('W').agg(lambda x: x[-1]) prices.dropna(axis=0, how='any', inplace=True) rf = prices['^TNX'].values[:-1] rf /= (52 * 100) returns = prices.iloc[:, :-1].pct_change()[1:] rm = returns['^GSPC'].values ri = returns.iloc[:, :-1].values Ri = ri - rf[:, numpy.newaxis] Rm = rm - rf model = sm.OLS(Ri, sm.add_constant(Rm)) results = model.fit() alpha, beta = results.params epsilon = numpy.sqrt(Ri.var(axis=0) - beta**2 * Rm.var(axis=0)) output = pandas.DataFrame(columns=['alpha', 'beta', 'epsilon'], index=tickers, data=numpy.array([alpha, beta, epsilon]).T) output.to_csv("coefficients.csv") from arch.univariate import ARX, GARCH arx = ARX(rm, lags=1) arx.volatility = GARCH() res = arx.fit(disp='off') pandas.DataFrame(res.params).to_csv("parameters.csv")
# In[58]: # AR from arch.univariate import ARX ar = ARX(ts_data, lags = [1, 3, 12]) # print(ar.fit().summary()) # In[60]: # Volatility Processes from arch.univariate import ARCH, GARCH ar.volatility = ARCH(p=5) res = ar.fit(update_freq=0, disp='off') # print(res.summary()) fig = res.plot() # Distribution from arch.univariate import StudentsT ar.distribution = StudentsT() res = ar.fit(update_freq=0, disp='off') # print(res.summary()) # In[61]: # price to return # crude_ret = 100 * crude.dropna().pct_change().dropna()