예제 #1
0
def test_x_reformat_1var(exog_format):
    # (10,)
    # (1,10)
    # (n, 10)
    # (1,1,10)
    # (1,n,10)
    # {"x1"} : (10,)
    # {"x1"} : (1,10)
    # {"x1"} : (n,10)
    exog, ref = exog_format
    if exog is None:
        return
    if isinstance(exog, dict):
        nexog = len(exog)
    else:
        if np.ndim(exog) == 3:
            nexog = exog.shape[0]
        else:
            nexog = 1
    cols = [f"x{i}" for i in range(1, nexog + 1)]
    rng = RandomState(12345)
    x = pd.DataFrame(rng.standard_normal((SP500.shape[0], nexog)),
                     columns=cols,
                     index=SP500.index)
    mod = ARX(SP500, lags=1, x=x)
    res = mod.fit()
    fcasts = res.forecast(horizon=10, x=exog, reindex=False)
    ref = res.forecast(horizon=10, x=ref, reindex=False)
    assert_allclose(fcasts.mean, ref.mean)
예제 #2
0
def main(ticker1, ticker2):
	df = pd.read_csv("./Data/close.csv", dtype={"date": str})

	df2 = np.log(df.loc[:, [ticker1, ticker2]]).diff().dropna()
	x = df2[ticker1].values
	y = df2[ticker2].values
	A = np.vstack((np.ones_like(x), x)).T

	b = np.linalg.inv(A.T.dot(A)).dot(A.T).dot(y)
	resid = y - A.dot(b)

	resid_se = pd.Series(resid)
	std2_se = resid_se.rolling(
	    window=100,
	).apply(lambda x: sqrt(sum(np.diff(x)**2) / (len(x) - 1)))
	mean_se = resid_se.rolling(
	    window=100,
	).mean()

	'''
	s_score = (pd.Series(resid_se) - mean_se) / std2_se
	'''
	ar = ARX(resid_se, volatility=EGARCH(2, 0, 2))
	ar.distribution = SkewStudent()
	res = ar.fit()
	s_score = pd.Series(resid)

	arg_lst = [
		(s_score, resid_se, i / 100.0, j / 100.0, k / 100.0, l / 100.0, m / 100.0, n / 100.0) for i in xrange(15, 35, 5) for j in xrange(i + 1, 49, 5) for k in xrange(j + 1, 50, 5) for l in xrange(85, 65, -5) for m in xrange(l - 1, 51, -5) for n in xrange(m - 1, 50, -5)
		]

	pool = mp.Pool(6)
	result = pool.map(back_test_sharp, arg_lst)
	pool.close()
	pool.join()

	with open("./pkl/EG_result_lst_{}_{}_sharp".format(ticker1, ticker2), "wb") as fp:
		cp.dump(result, fp)
	
	x_mean = x.mean()
	y_mean = y.mean()
	pearson = (x - x_mean).dot(y - y_mean) / sqrt(sum((x - x_mean)**2)) / sqrt(sum((y - y_mean)**2))

	result.sort(key=lambda x: x[0], reverse=True)
	best = result[0]
	res = back_test((s_score, resid_se, best[1], best[2], best[3], best[4], best[5], best[6]))
	fig = plt.figure(figsize=(20, 10))
	plt.plot(res[0])
	plt.savefig("./Pics/net_value/EG_{}_{}.png".format(ticker1, ticker2))
	del fig
	return pd.Series(res[0]).to_csv("./xlsx/EG_{}_{}_{}_{}_{}_{}_{}_{}_{}.csv".format(ticker1, ticker2, pearson, best[1], best[2], best[3], best[4], best[5], best[6]))
예제 #3
0
def estimate_qar(y, p=1, q=1, disp=1):
    """
    Estimates a QAR(p, q) on data y.
 
    disp
 
    Returns statsmodels.fitted object.
    """
    lags = p
    qarpq = QAR(y, p=lags, q=1)

    am = ARX(y, lags=lags, constant=True)
    first_stage = am.fit()

    params = np.r_[first_stage.params[:-1], 100 * np.zeros(lags),
                   100 * np.zeros(qarpq.q),
                   1 * np.sqrt(np.abs(first_stage.params[-1]))]
    #]

    results = qarpq.fit(maxiter=50000, start_params=params, disp=disp)

    return results
예제 #4
0
def test_arx_no_lags():
    mod = ARX(SP500, volatility=GARCH())
    res = mod.fit(disp="off")
    assert res.params.shape[0] == 4
    assert "lags" not in mod._model_description(include_lags=False)
예제 #5
0
print(model.summary())

#5.
cny = web.DataReader('CNY=X', 'yahoo', dt.datetime(2015, 1, 1),
                     dt.datetime(2015, 12, 31))

ret = (cny.Close - cny.Close.shift(1)) / cny.Close.shift(1)
ret = ret.dropna()

cny.Close.plot()

ret.plot()
plot_acf(ret, lags=20)
plot_pacf(ret, lags=20)

LjungBox = stattools.q_stat(stattools.acf(ret)[1:13], len(ret))
LjungBox[1][-1]

(ret**2).plot()
plot_acf(ret**2, lags=20)
plot_pacf(ret**2, lags=20)

LjungBox = stattools.q_stat(stattools.acf(ret**2)[1:13], len(ret))
LjungBox[1][-1]

from arch.univariate import ARX, GARCH
model = ARX(ret, lags=1)
model.volatility = GARCH()
res = model.fit()
print(res.summary())
                        columns=['Buy and Hold', 'Strategy'])
eqCurves['Buy and Hold'] = returns['Buy and Hold'].cumsum() + 1
eqCurves['Strategy'] = returns['Strategy'].cumsum() + 1

eqCurves['Strategy'].plot(figsize=(10, 8))
eqCurves['Buy and Hold'].plot()
plt.legend()
plt.show()

# # From Arch website

# In[273]:

from arch.univariate import ARX
ar = ARX(Y, lags=30)
print(ar.fit().summary())

# In[270]:

from arch.univariate import ARCH, GARCH
ar.volatility = GARCH(p=3, o=0, q=3)
res = ar.fit(update_freq=0, disp='off')
p(res.summary())

# In[265]:

from arch.univariate import StudentsT
ar.distribution = StudentsT()
res = ar.fit(update_freq=0, disp='off')
p(res.summary())
예제 #7
0
def main(fund_price_file=None, fund_region='EU', returns_type='pct', tag=''):
    os.chdir(os.path.dirname(
        __file__))  # switch to the folder where you script is stored
    output_folder = '{}_{}_{}_return'.format(tag, fund_region, returns_type)
    output_dir = os.path.join(os.path.dirname(__file__), output_folder)

    ##########################################################################
    # read four factors of fama french data
    ##########################################################################

    if fund_region == 'EU':
        file_3_Factors = 'Europe_3_Factors_Daily.csv'
        file_MOM_Factor = 'Europe_MOM_Factor_Daily.csv'
        df_threefators = pd.read_csv(file_3_Factors,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=6).drop('RF',
                                                      axis=1)['2013':'2018']
        df_forthfactor = pd.read_csv(file_MOM_Factor,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=6)['2013':'2018']
        ff_rf = pd.read_csv(file_3_Factors,
                            parse_dates=[0],
                            index_col=0,
                            skiprows=6)['RF']['2013':'2018']

    if fund_region == 'US':
        file_3_Factors = 'F-F_Research_Data_Factors_daily.CSV'
        file_MOM_Factor = 'F-F_Momentum_Factor_daily.csv'
        df_threefators = pd.read_csv(file_3_Factors,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=4).drop('RF',
                                                      axis=1)['2013':'2018']
        df_forthfactor = pd.read_csv(file_MOM_Factor,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=13)['2013':'2018']
        ff_rf = pd.read_csv(file_3_Factors,
                            parse_dates=[0],
                            index_col=0,
                            skiprows=4)['RF']['2013':'2018']

    if fund_region == 'Global':
        file_3_Factors = 'Global_3_Factors_daily.CSV'
        file_MOM_Factor = 'Global_MOM_Factor_daily.csv'
        df_threefators = pd.read_csv(file_3_Factors,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=6).drop('RF',
                                                      axis=1)['2013':'2018']
        df_forthfactor = pd.read_csv(file_MOM_Factor,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=6)['2013':'2018']
        ff_rf = pd.read_csv(file_3_Factors,
                            parse_dates=[0],
                            index_col=0,
                            skiprows=6)['RF']['2013':'2018']

    factors = pd.concat([df_threefators, df_forthfactor], axis=1)
    factors.index = pd.to_datetime(factors.index)
    factors = factors / 100
    ff_rf = ff_rf / 100
    print(factors.head())
    print(factors.describe())

    ##########################################################################
    # read green fund daily price
    ##########################################################################

    file = fund_price_file
    xl = pd.ExcelFile(file)
    print(xl.sheet_names)

    stats_list = []
    ols_list = []
    pvalues_list = []
    garch_list = []
    arx_list = []

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    os.chdir(output_dir)

    for select_sheet in xl.sheet_names:
        df = xl.parse(select_sheet,
                      parse_dates=[0],
                      index_col=0,
                      pase_dates=True,
                      skiprows=[0, 1, 2, 4],
                      header=0)
        df.index = pd.to_datetime(df.index)
        print('Import sheet: {}'.format(select_sheet))

        # skip/filter Nan colomns
        print('the following columns are not numeric ')
        print(df.select_dtypes(exclude=['float64']))
        df = df.select_dtypes(include=['float64'])

        ##########################################################################
        # calculate daily average returns and describe stats ; https://stackoverflow.com/questions/35365545/calculating-cumulative-returns-with-pandas-dataframe
        ##########################################################################
        if returns_type == 'pct':  # simple return
            returns = df.pct_change(limit=2).mean(axis=1)['2013':'2018']
        if returns_type == 'cum':  # cumulative_return
            returns = df.pct_change(limit=2)['2013':'2018']
            returns = ((1 + returns).cumprod() - 1).mean(axis=1)
        if returns_type == 'log':  # log return
            returns = np.log(1 + df.pct_change(limit=2)).mean(
                axis=1)['2013':'2018']
        print(returns.describe())

        # check data completeness
        print('The following date have NaN return value')
        print(returns[returns.isna().any()])
        returns.fillna(method='bfill', inplace=True)

        returns.plot()
        plt.savefig('{}_daily_returns.png'.format(select_sheet))
        plt.close()

        stats_current = returns.describe()
        stats_current.name = select_sheet
        stats_list.append(stats_current)

        ##########################################################################
        # linear regression of fama french factors
        ##########################################################################
        slice_index_ols = returns.index.intersection(factors.index)

        X = factors.loc[slice_index_ols]
        y = returns.loc[slice_index_ols] - ff_rf[slice_index_ols]
        X_with_constant = sm.add_constant(X)
        model_static = sm.OLS(y, X_with_constant, missing='drop').fit()

        print(model_static.params)
        ols_current = model_static.params
        ols_current.name = select_sheet
        ols_list.append(ols_current)

        pvalues_current = model_static.pvalues
        pvalues_current.name = select_sheet
        pvalues_list.append(pvalues_current)

        with open('ols_summary_{}.csv'.format(select_sheet), 'w') as f:
            f.write(model_static.summary().as_csv())

        ##########################################################################
        # arch analysis of volatility
        ##########################################################################
        am = arch_model(returns)
        res = am.fit()
        print(res.summary())

        garch_current = res.params
        garch_current.name = select_sheet
        garch_list.append(garch_current)

        with open('garch_summary_{}.csv'.format(select_sheet), 'w') as f:
            f.write(res.summary().as_csv())

        res.plot(annualize='D')
        plt.savefig('garch_{}.png'.format(select_sheet))
        plt.close()

        ##########################################################################
        # arx analysis of volatility
        ##########################################################################
        from arch.univariate import ARX
        arx = ARX(returns, lags=[1])
        res = arx.fit()

        print(res.summary())

        arx_current = res.params
        arx_current.name = select_sheet
        arx_list.append(arx_current)

        with open('arx_summary_{}.csv'.format(select_sheet), 'w') as f:
            f.write(res.summary().as_csv())

        res.plot(annualize='D')
        plt.savefig('arx_{}.png'.format(select_sheet))
        plt.close()

    ##########################################################################
    # write all results
    ##########################################################################
    pd.concat(stats_list, axis=1).to_csv('greenfund_stats.csv')
    pd.concat(ols_list, axis=1).to_csv('greenfund_ols.csv')
    pd.concat(pvalues_list, axis=1).to_csv('greenfund_pvalues.csv')
    pd.concat(garch_list, axis=1).to_csv('greenfund_garch.csv')
    pd.concat(arx_list, axis=1).to_csv('greenfund_arx.csv')
예제 #8
0
# ARCH effect
ar_res = ar_select_order(rates, 5).model.fit()
# Test of no serial correlation and homoskedasticity
print(ar_res.diagnostic_summary())
print(ar_res.summary())
plt.figure()
plt.plot(ar_res.resid)

# a = ar_res.resid
# a_res = ar_select_order(a, 5).model.fit()
# print(a_res.diagnostic_summary())

# Fit with GARCH(p, q)
ar = ARX(rates, lags=[1, 2])  # Mean model
ar.volatility = GARCH(p=1, q=1)  # Volatility model
res = ar.fit()
res.plot()
print(res.summary())

# Forecast
drop = len(data) - len(rates)
start = 3254 - 2 - drop
end = 3262 - 2 - drop

var = res.forecast(start=start, horizon=5,
                   method='simulation').variance[start:1 + end]
var.plot()
entry = [
    '2012:06:20',
    '2012:06:21',
    '2012:06:22',
예제 #9
0
from statsmodels.tsa.arima_model import ARMA
import pandas
import numpy
import statsmodels.api as sm

prices = pandas.read_csv("prices.csv", parse_dates=['Date'], index_col=0)
tickers = prices.columns[:-2]
prices = prices.resample('W').agg(lambda x: x[-1])
prices.dropna(axis=0, how='any', inplace=True)
rf = prices['^TNX'].values[:-1]
rf /= (52 * 100)
returns = prices.iloc[:, :-1].pct_change()[1:]
rm = returns['^GSPC'].values
ri = returns.iloc[:, :-1].values
Ri = ri - rf[:, numpy.newaxis]
Rm = rm - rf
model = sm.OLS(Ri, sm.add_constant(Rm))
results = model.fit()
alpha, beta = results.params
epsilon = numpy.sqrt(Ri.var(axis=0) - beta**2 * Rm.var(axis=0))
output = pandas.DataFrame(columns=['alpha', 'beta', 'epsilon'],
                          index=tickers,
                          data=numpy.array([alpha, beta, epsilon]).T)
output.to_csv("coefficients.csv")
from arch.univariate import ARX, GARCH
arx = ARX(rm, lags=1)
arx.volatility = GARCH()
res = arx.fit(disp='off')
pandas.DataFrame(res.params).to_csv("parameters.csv")
# In[58]:


# AR
from arch.univariate import ARX
ar = ARX(ts_data, lags = [1, 3, 12])
# print(ar.fit().summary())


# In[60]:


# Volatility Processes
from arch.univariate import ARCH, GARCH
ar.volatility = ARCH(p=5)
res = ar.fit(update_freq=0, disp='off')
# print(res.summary())
fig = res.plot()

# Distribution
from arch.univariate import StudentsT
ar.distribution = StudentsT()
res = ar.fit(update_freq=0, disp='off')
# print(res.summary())


# In[61]:


# price to return
# crude_ret = 100 * crude.dropna().pct_change().dropna()