Example #1
0
def test_x_reformat_1var(exog_format):
    # (10,)
    # (1,10)
    # (n, 10)
    # (1,1,10)
    # (1,n,10)
    # {"x1"} : (10,)
    # {"x1"} : (1,10)
    # {"x1"} : (n,10)
    exog, ref = exog_format
    if exog is None:
        return
    if isinstance(exog, dict):
        nexog = len(exog)
    else:
        if np.ndim(exog) == 3:
            nexog = exog.shape[0]
        else:
            nexog = 1
    cols = [f"x{i}" for i in range(1, nexog + 1)]
    rng = RandomState(12345)
    x = pd.DataFrame(rng.standard_normal((SP500.shape[0], nexog)),
                     columns=cols,
                     index=SP500.index)
    mod = ARX(SP500, lags=1, x=x)
    res = mod.fit()
    fcasts = res.forecast(horizon=10, x=exog, reindex=False)
    ref = res.forecast(horizon=10, x=ref, reindex=False)
    assert_allclose(fcasts.mean, ref.mean)
def main(ticker1, ticker2):
	df = pd.read_csv("./Data/close.csv", dtype={"date": str})

	df2 = np.log(df.loc[:, [ticker1, ticker2]]).diff().dropna()
	x = df2[ticker1].values
	y = df2[ticker2].values
	A = np.vstack((np.ones_like(x), x)).T

	b = np.linalg.inv(A.T.dot(A)).dot(A.T).dot(y)
	resid = y - A.dot(b)

	resid_se = pd.Series(resid)
	std2_se = resid_se.rolling(
	    window=100,
	).apply(lambda x: sqrt(sum(np.diff(x)**2) / (len(x) - 1)))
	mean_se = resid_se.rolling(
	    window=100,
	).mean()

	'''
	s_score = (pd.Series(resid_se) - mean_se) / std2_se
	'''
	ar = ARX(resid_se, volatility=EGARCH(2, 0, 2))
	ar.distribution = SkewStudent()
	res = ar.fit()
	s_score = pd.Series(resid)

	arg_lst = [
		(s_score, resid_se, i / 100.0, j / 100.0, k / 100.0, l / 100.0, m / 100.0, n / 100.0) for i in xrange(15, 35, 5) for j in xrange(i + 1, 49, 5) for k in xrange(j + 1, 50, 5) for l in xrange(85, 65, -5) for m in xrange(l - 1, 51, -5) for n in xrange(m - 1, 50, -5)
		]

	pool = mp.Pool(6)
	result = pool.map(back_test_sharp, arg_lst)
	pool.close()
	pool.join()

	with open("./pkl/EG_result_lst_{}_{}_sharp".format(ticker1, ticker2), "wb") as fp:
		cp.dump(result, fp)
	
	x_mean = x.mean()
	y_mean = y.mean()
	pearson = (x - x_mean).dot(y - y_mean) / sqrt(sum((x - x_mean)**2)) / sqrt(sum((y - y_mean)**2))

	result.sort(key=lambda x: x[0], reverse=True)
	best = result[0]
	res = back_test((s_score, resid_se, best[1], best[2], best[3], best[4], best[5], best[6]))
	fig = plt.figure(figsize=(20, 10))
	plt.plot(res[0])
	plt.savefig("./Pics/net_value/EG_{}_{}.png".format(ticker1, ticker2))
	del fig
	return pd.Series(res[0]).to_csv("./xlsx/EG_{}_{}_{}_{}_{}_{}_{}_{}_{}.csv".format(ticker1, ticker2, pearson, best[1], best[2], best[3], best[4], best[5], best[6]))
def run_garch_simple(y, mean_model, vol_model, split_date, x=None, verbose=False):

    # specify mean model
    if mean_model == "CONST":
        ls = ConstantMean(y)
    elif mean_model == 'LS':
        ls = LS(y=y, x=x)
    elif mean_model == 'ARX':
        ls = ARX(y=y, lags=1)
    else:
        print("Misspecified mean model name. Please choose between CONST, LS, ARX.")
    
    # specify volatility model
    if vol_model == "GARCH":
        ls.volatility = GARCH(p=1, q=1)
    elif vol_model == "EGARCH":
        ls.volatility = EGARCH(p=1, o=1, q=1)
    elif vol_model == "EWMA":
        ls.volatility = EWMAVariance(lam=None)
    else:
        print("Misspecified volatility process name. Please choose between GARCH, EGARCH, EWMA.")
    
    res = ls.fit(disp='off', last_obs=split_date)
    
    if verbose:
        display(Markdown('#### <br> <br> GARCH model results'))
        print(res.summary())
    
    return res
def get_disaster_factors(innovation_method, agg_freq="mon", resample=True):
    r'''
    Function to get various disaster risk factors and their innovations.

    Args:
        innovation_method: String for how to compute innovations in disaster
                           risk factors.
                               'AR' uses an AR1 model
                               'fd' uses first-differences
        agg_freq: can be either "mon" or "week"

    Returns:
        df: Dataframe where index is date and columns are various disaster
            risk factors
        df_innov: Dataframe containing innovations to disaster risk factors
    '''

    if agg_freq == "mon":
        agg_freq = "date_mon"
    elif agg_freq == "week":
        agg_freq = "date_week"
    else:
        raise ValueError("agg_freq should be either 'mon' or 'week'")

    # == Check inputs == #
    if innovation_method not in ['AR', 'fd']:
        raise ValueError("innovation_method must be either 'AR' or 'fd'")

    # == Read in raw data == #
    raw_f = pd.read_csv("estimated_data/disaster_risk_measures/" +\
                        "disaster_risk_measures.csv")
    raw_f['date'] = pd.to_datetime(raw_f['date'])
    raw_f = raw_f[raw_f.agg_freq == agg_freq]
    # raw_f = raw_f[raw_f.variable.isin(["D_clamp", "rn_prob_5", "rn_prob_20", "rn_prob_80"]) &
    #               raw_f.maturity.isin(["level", "30", "180"])]
    raw_f = raw_f[raw_f.variable.isin(["D_clamp"])
                  & raw_f.maturity.isin(["level"]) & (raw_f.level == "Ind")]

    # == Create variable names == #
    raw_f['name'] = raw_f['level'] + '_' + raw_f['variable'] +\
                    '_' + raw_f['maturity'].astype(str)

    # == Create pivot table, then resample to end of month == #
    pdf = raw_f.pivot_table(index='date', columns='name', values='value')
    if resample:
        pdf = pdf.resample('M').last()

    # == Compute innovations in each factor == #
    if innovation_method == 'fd':
        df = pdf.diff()
    elif innovation_method == 'AR':
        df = pd.DataFrame(index=pdf.index, columns=pdf.columns)
        for col in df.columns:
            ar = ARX(pdf[col], lags=[1]).fit()
            df.loc[ar.resid.index, col] = ar.resid.values
        df = df.astype(float)

    return pdf, df
Example #5
0
def estimate_qar(y, p=1, q=1, disp=1):
    """
    Estimates a QAR(p, q) on data y.
 
    disp
 
    Returns statsmodels.fitted object.
    """
    lags = p
    qarpq = QAR(y, p=lags, q=1)

    am = ARX(y, lags=lags, constant=True)
    first_stage = am.fit()

    params = np.r_[first_stage.params[:-1], 100 * np.zeros(lags),
                   100 * np.zeros(qarpq.q),
                   1 * np.sqrt(np.abs(first_stage.params[-1]))]
    #]

    results = qarpq.fit(maxiter=50000, start_params=params, disp=disp)

    return results
Example #6
0
def test_x_exceptions():
    res = ARX(SP500, lags=1).fit(disp="off")
    with pytest.raises(TypeError, match="x is not None but"):
        res.forecast(reindex=False, x=SP500)
    x = SP500.copy()
    x[:] = np.random.standard_normal(SP500.shape)
    res = ARX(SP500, lags=1, x=x).fit(disp="off")
    with pytest.raises(TypeError, match="x is None but the model"):
        res.forecast(reindex=False)
    res = ARX(SP500, lags=1, x=x).fit(disp="off")
    with pytest.raises(ValueError, match="x must have the same"):
        res.forecast(reindex=False, x={})
    with pytest.raises(ValueError, match="x must have the same"):
        res.forecast(reindex=False, x={"x0": x, "x1": x})
    with pytest.raises(KeyError, match="The keys of x must exactly"):
        res.forecast(reindex=False, x={"z": x})
    with pytest.raises(ValueError,
                       match="The arrays contained in the dictionary"):
        _x = np.asarray(x).reshape((1, x.shape[0], 1))
        res.forecast(reindex=False, x={"x0": _x})
    x2 = pd.concat([x, x], 1)
    x2.columns = ["x0", "x1"]
    x2.iloc[:, 1] = np.random.standard_normal(SP500.shape)
    res = ARX(SP500, lags=1, x=x2).fit(disp="off")
    with pytest.raises(ValueError, match="The shapes of the arrays contained"):
        res.forecast(reindex=False,
                     x={
                         "x0": x2.iloc[:, 0],
                         "x1": x2.iloc[10:, 1:]
                     })
    with pytest.raises(ValueError, match="1- and 2-dimensional x values"):
        res.forecast(reindex=False, x=x2)
    with pytest.raises(ValueError, match="The leading dimension of x"):
        _x2 = np.asarray(x2)
        _x2 = _x2.reshape((1, -1, 2))
        res.forecast(reindex=False, x=_x2)
    with pytest.raises(ValueError, match="The number of values passed"):
        res.forecast(reindex=False, x=np.empty((2, SP500.shape[0], 3)))
    with pytest.raises(ValueError,
                       match="The shape of x does not satisfy the"):
        res.forecast(reindex=False, x=np.empty((2, SP500.shape[0] // 2, 1)))
Example #7
0
def test_arx_no_lags():
    mod = ARX(SP500, volatility=GARCH())
    res = mod.fit(disp="off")
    assert res.params.shape[0] == 4
    assert "lags" not in mod._model_description(include_lags=False)
Example #8
0
    HARX,
    ConstantMean,
    ConstantVariance,
    EWMAVariance,
    MIDASHyperbolic,
    RiskMetrics2006,
    ZeroMean,
    arch_model,
)
from arch.univariate.mean import _ar_forecast, _ar_to_impulse

SP500 = 100 * sp500.load()["Adj Close"].pct_change().dropna()

MEAN_MODELS = [
    HARX(SP500, lags=[1, 5]),
    ARX(SP500, lags=2),
    ConstantMean(SP500),
    ZeroMean(SP500),
]

VOLATILITIES = [
    ConstantVariance(),
    GARCH(),
    FIGARCH(),
    EWMAVariance(lam=0.94),
    MIDASHyperbolic(),
    HARCH(lags=[1, 5, 22]),
    RiskMetrics2006(),
    APARCH(),
    EGARCH(),
]
Example #9
0
def return_sampler_garch(
    N_train: int,
    mean_process: str = "Constant",
    lags_mean_process: int = None,
    vol_process: str = "GARCH",
    distr_noise: str = "normal",
    seed: int = None,
    seed_param: int = None,
    p_arg: list = None,
) -> Tuple[np.ndarray, pd.Series]:
    # https://stats.stackexchange.com/questions/61824/how-to-interpret-garch-parameters
    # https://arch.readthedocs.io/en/latest/univariate/introduction.html
    # https://arch.readthedocs.io/en/latest/univariate/volatility.html
    # https://github.com/bashtage/arch/blob/master/arch/univariate/volatility.py
    """
    Generates financial returns driven by mean-reverting factors.

    Parameters
    ----------
    N_train: int
        Length of the experiment

    mean_process: str
        Mean process for the returns. It can be 'Constant' or 'AR'

    lags_mean_process: int
        Order of autoregressive lag if mean_process is AR

    vol_process: str
        Volatility process for the returns. It can be 'GARCH', 'EGARCH', 'TGARCH',
        'ARCH', 'HARCH', 'FIGARCH' or 'Constant'. Note that different volatility
        processes requires different parameter, which are hard coded. If you want to
        pass them explicitly, use p_arg.

    distr_noise: str
        Distribution for the unpredictable component of the returns. It can be
        'normal', 'studt', 'skewstud' or 'ged'. Note that different distributions
        requires different parameter, which are hard coded. If you want to
        pass them explicitly, use p_arg.

    seed: int
        Seed for experiment reproducibility

    seed_param: int
        Seed for drawing randomly the parameters needed for the simulation. The
        ranges provided are obtained as average lower and upper bounds of several
        GARCH-type model fitting on real financial time-series.

    p_arg: pd.Series
        Pandas series of parameters that you want to pass explicitly.
        They need to be passed in the right order. Check documentation of the
        arch python package (https://arch.readthedocs.io/en/latest/index.html) for more details.
    Returns
    -------
    simulations['data'].values: np.ndarray
        Simulated series of returns
    p: pd.Series
        Series  of parameters used for simulation
    """
    names = []
    vals = []

    if seed_param is None:
        seed_param = seed

    rng = np.random.RandomState(seed_param)

    # choose mean process
    if mean_process == "Constant":
        model = ConstantMean(None)
        names.append("const")
        if seed_param:
            vals.append(rng.uniform(0.01, 0.09))
        else:
            vals.append(0.0)

    elif mean_process == "AR":
        model = ARX(None, lags=lags_mean_process)
        names.append("const")
        vals.append(0.0)
        if seed_param:
            for i in range(lags_mean_process):
                names.append("lag{}".format(i))
                vals.append(rng.uniform(-0.09, 0.09))
        else:
            for i in range(lags_mean_process):
                names.append("lag{}".format(i))
                vals.append(0.9)

    else:
        return print("This mean process doesn't exist or it's not available.")
        sys.exit()

    # choose volatility process
    if vol_process == "GARCH":
        model.volatility = GARCH(p=1, q=1)
        names.extend(["omega", "alpha", "beta"])
        if seed_param:
            om = rng.uniform(0.03, 0.1)
            alph = rng.uniform(0.05, 0.1)
            b = rng.uniform(0.86, 0.92)
            garch_p = np.array([om, alph, b]) / (np.array([om, alph, b]).sum())
        else:
            om = 0.01
            alph = 0.05
            b = 0.94
            garch_p = np.array([om, alph, b])
        vals.extend(list(garch_p))

    elif vol_process == "ARCH":
        model.volatility = GARCH(p=1, q=0)

        names.extend(["omega", "alpha"])
        if seed_param:
            om = rng.uniform(1.4, 4.0)
            alph = rng.uniform(0.1, 0.6)
        else:
            om = 0.01
            alph = 0.4
        garch_p = np.array([om, alph])
        vals.extend(list(garch_p))

    elif vol_process == "HARCH":
        model.volatility = HARCH(lags=[1, 5, 22])

        names.extend(["omega", "alpha[1]", "alpha[5]", "alpha[22]"])
        if seed_param:
            om = rng.uniform(1.2, 0.5)
            alph1 = rng.uniform(0.01, 0.1)
            alph5 = rng.uniform(0.05, 0.3)
            alph22 = rng.uniform(0.4, 0.7)
        else:
            om = 0.01
            alph1 = 0.05
            alph5 = 0.15
            alph22 = 0.5
        garch_p = np.array([om, alph1, alph5, alph22])
        vals.extend(list(garch_p))

    elif vol_process == "FIGARCH":
        model.volatility = FIGARCH(p=1, q=1)

        names.extend(["omega", "phi", "d", "beta"])
        if seed_param:
            om = rng.uniform(0.05, 0.03)
            phi = rng.uniform(0.1, 0.35)
            d = rng.uniform(0.3, 0.5)
            beta = rng.uniform(0.4, 0.7)
        else:
            om = 0.01
            phi = 0.2
            d = 0.2
            beta = 0.55
        garch_p = np.array([om, phi, d, beta])
        vals.extend(list(garch_p))

    elif vol_process == "TGARCH":
        model.volatility = GARCH(p=1, o=1, q=1)
        names.extend(["omega", "alpha", "gamma", "beta"])
        if seed_param:
            om = rng.uniform(0.02, 0.15)
            alph = rng.uniform(0.01, 0.07)
            gamma = rng.uniform(0.03, 0.1)
            b = rng.uniform(0.88, 0.94)
        else:
            om = 0.01
            alph = 0.05
            gamma = 0.04
            b = 0.90
        garch_p = np.array([om, alph, gamma, b])
        vals.extend(list(garch_p))

    elif vol_process == "EGARCH":
        model.volatility = EGARCH(p=1, o=1, q=1)
        names.extend(["omega", "alpha", "gamma", "beta"])
        if seed_param:
            om = rng.uniform(0.01, 0.03)
            alph = rng.uniform(0.06, 0.17)
            gamma = rng.uniform(-0.05, -0.02)
            b = rng.uniform(0.97, 0.99)
            garch_p = np.array([om, alph, gamma, b]) / (np.array(
                [om, alph, gamma, b]).sum())
        else:
            om = 0.01
            alph = 0.05
            gamma = -0.02
            b = 0.94
            garch_p = np.array([om, alph, gamma, b])
        vals.extend(list(garch_p))

    elif vol_process == "Constant":
        model.volatility = ConstantVariance()
        names.append("sigma_const")
        vals.append(rng.uniform(0.02, 0.05))
    else:
        print("This volatility process doesn't exist or it's not available.")
        sys.exit()

    if distr_noise == "normal":
        model.distribution = Normal(np.random.RandomState(seed))
    elif distr_noise == "studt":
        model.distribution = StudentsT(np.random.RandomState(seed))
        names.append("nu")
        if seed_param:
            vals.append(rng.randint(6.0, 10.0))
        else:
            vals.append(8.0)
    elif distr_noise == "skewstud":
        model.distribution = SkewStudent(np.random.RandomState(seed))
        names.extend(["nu", "lambda"])
        if seed_param:
            vals.extend([rng.uniform(6.0, 10.0), rng.uniform(-0.1, 0.1)])
        else:
            vals.extend([8.0, 0.05])
    elif distr_noise == "ged":
        model.distribution = GeneralizedError(np.random.RandomState(seed))
        names.append("nu")
        if seed_param:
            vals.append(rng.uniform(1.05, 3.0))
        else:
            vals.append(2.0)
    else:
        print("This noise distribution doesn't exist or it's not available.")
        sys.exit()

    p = pd.Series(data=vals, index=names)
    if p_arg:
        p = p_arg
    simulations = model.simulate(p, N_train) / 100

    return simulations["data"].values, p
Example #10
0
print(model.summary())

#5.
cny = web.DataReader('CNY=X', 'yahoo', dt.datetime(2015, 1, 1),
                     dt.datetime(2015, 12, 31))

ret = (cny.Close - cny.Close.shift(1)) / cny.Close.shift(1)
ret = ret.dropna()

cny.Close.plot()

ret.plot()
plot_acf(ret, lags=20)
plot_pacf(ret, lags=20)

LjungBox = stattools.q_stat(stattools.acf(ret)[1:13], len(ret))
LjungBox[1][-1]

(ret**2).plot()
plot_acf(ret**2, lags=20)
plot_pacf(ret**2, lags=20)

LjungBox = stattools.q_stat(stattools.acf(ret**2)[1:13], len(ret))
LjungBox[1][-1]

from arch.univariate import ARX, GARCH
model = ARX(ret, lags=1)
model.volatility = GARCH()
res = model.fit()
print(res.summary())
eqCurves = pd.DataFrame(index=signal.index,
                        columns=['Buy and Hold', 'Strategy'])
eqCurves['Buy and Hold'] = returns['Buy and Hold'].cumsum() + 1
eqCurves['Strategy'] = returns['Strategy'].cumsum() + 1

eqCurves['Strategy'].plot(figsize=(10, 8))
eqCurves['Buy and Hold'].plot()
plt.legend()
plt.show()

# # From Arch website

# In[273]:

from arch.univariate import ARX
ar = ARX(Y, lags=30)
print(ar.fit().summary())

# In[270]:

from arch.univariate import ARCH, GARCH
ar.volatility = GARCH(p=3, o=0, q=3)
res = ar.fit(update_freq=0, disp='off')
p(res.summary())

# In[265]:

from arch.univariate import StudentsT
ar.distribution = StudentsT()
res = ar.fit(update_freq=0, disp='off')
p(res.summary())
def get_disaster_factors(innovation_method,
                         level_filter=None,
                         var_filter=None,
                         day_filter=None):
    r'''
    Function to get various disaster risk factors and their innovations.

    Args:
        innovation_method: String for how to compute innovations in disaster
                           risk factors.
                               'AR' uses an AR1 model
                               'fd' uses first-differences
        level_filter: List of filters to apply to whether disaster risk comes
                      from sp_500 or individual firms (ind)
        var_filter: List of filters to apply to the disaster risk measure
                    (D, rn_prob_2sigma, rn_prob_20, rn_prob_40, rb_prob_60)
        day_filter: List of filters to apply to duration of options that
                    went into measure (30, 60, 120)

    Returns:
        df: Dataframe where index is date and columns are various disaster
            risk factors
        df_innov: Dataframe containing innovations to disaster risk factors
    '''

    # == Check inputs == #
    if innovation_method not in ['AR', 'fd']:
        raise ValueError("innovation_method must be either 'AR' or 'fd'")

    # == Read in raw data == #
    raw_f = pd.read_csv("estimated_data/disaster_risk_measures/" +\
                        "combined_disaster_df.csv")
    raw_f['date_eom'] = pd.to_datetime(raw_f['date'])
    raw_f.drop('date', axis=1, inplace=True)

    # == Focus only on direct (for S&P 500) and filtered mean aggregation == #
    raw_f = raw_f[raw_f.agg_type.isin(['direct', 'mean_filter'])]

    # == Apply other filters == #
    if level_filter is not None:
        raw_f = raw_f[raw_f['level'].isin(level_filter)]
    if var_filter is not None:
        raw_f = raw_f[raw_f['var'].isin(var_filter)]
    if day_filter is not None:
        raw_f = raw_f[raw_f['days'].isin(day_filter)]

    # == Create variable names == #
    raw_f['name'] = raw_f['level'] + '_' + raw_f['var'] +\
                    '_' + raw_f['days'].astype(str)

    # == Create pivot table, then resample to end of month == #
    pdf = raw_f.pivot_table(index='date_eom', columns='name', values='value')
    pdf = pdf.resample('M').last()

    # == Compute innovations in each factor == #
    if innovation_method == 'fd':
        df = pdf.diff()
    elif innovation_method == 'AR':
        df = pd.DataFrame(index=pdf.index, columns=pdf.columns)
        for col in df.columns:
            ar = ARX(pdf[col], lags=[1]).fit()
            df.loc[ar.resid.index, col] = ar.resid.values
        df = df.astype(float)

    return pdf, df
Example #13
0
def main(fund_price_file=None, fund_region='EU', returns_type='pct', tag=''):
    os.chdir(os.path.dirname(
        __file__))  # switch to the folder where you script is stored
    output_folder = '{}_{}_{}_return'.format(tag, fund_region, returns_type)
    output_dir = os.path.join(os.path.dirname(__file__), output_folder)

    ##########################################################################
    # read four factors of fama french data
    ##########################################################################

    if fund_region == 'EU':
        file_3_Factors = 'Europe_3_Factors_Daily.csv'
        file_MOM_Factor = 'Europe_MOM_Factor_Daily.csv'
        df_threefators = pd.read_csv(file_3_Factors,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=6).drop('RF',
                                                      axis=1)['2013':'2018']
        df_forthfactor = pd.read_csv(file_MOM_Factor,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=6)['2013':'2018']
        ff_rf = pd.read_csv(file_3_Factors,
                            parse_dates=[0],
                            index_col=0,
                            skiprows=6)['RF']['2013':'2018']

    if fund_region == 'US':
        file_3_Factors = 'F-F_Research_Data_Factors_daily.CSV'
        file_MOM_Factor = 'F-F_Momentum_Factor_daily.csv'
        df_threefators = pd.read_csv(file_3_Factors,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=4).drop('RF',
                                                      axis=1)['2013':'2018']
        df_forthfactor = pd.read_csv(file_MOM_Factor,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=13)['2013':'2018']
        ff_rf = pd.read_csv(file_3_Factors,
                            parse_dates=[0],
                            index_col=0,
                            skiprows=4)['RF']['2013':'2018']

    if fund_region == 'Global':
        file_3_Factors = 'Global_3_Factors_daily.CSV'
        file_MOM_Factor = 'Global_MOM_Factor_daily.csv'
        df_threefators = pd.read_csv(file_3_Factors,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=6).drop('RF',
                                                      axis=1)['2013':'2018']
        df_forthfactor = pd.read_csv(file_MOM_Factor,
                                     parse_dates=[0],
                                     index_col=0,
                                     skiprows=6)['2013':'2018']
        ff_rf = pd.read_csv(file_3_Factors,
                            parse_dates=[0],
                            index_col=0,
                            skiprows=6)['RF']['2013':'2018']

    factors = pd.concat([df_threefators, df_forthfactor], axis=1)
    factors.index = pd.to_datetime(factors.index)
    factors = factors / 100
    ff_rf = ff_rf / 100
    print(factors.head())
    print(factors.describe())

    ##########################################################################
    # read green fund daily price
    ##########################################################################

    file = fund_price_file
    xl = pd.ExcelFile(file)
    print(xl.sheet_names)

    stats_list = []
    ols_list = []
    pvalues_list = []
    garch_list = []
    arx_list = []

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    os.chdir(output_dir)

    for select_sheet in xl.sheet_names:
        df = xl.parse(select_sheet,
                      parse_dates=[0],
                      index_col=0,
                      pase_dates=True,
                      skiprows=[0, 1, 2, 4],
                      header=0)
        df.index = pd.to_datetime(df.index)
        print('Import sheet: {}'.format(select_sheet))

        # skip/filter Nan colomns
        print('the following columns are not numeric ')
        print(df.select_dtypes(exclude=['float64']))
        df = df.select_dtypes(include=['float64'])

        ##########################################################################
        # calculate daily average returns and describe stats ; https://stackoverflow.com/questions/35365545/calculating-cumulative-returns-with-pandas-dataframe
        ##########################################################################
        if returns_type == 'pct':  # simple return
            returns = df.pct_change(limit=2).mean(axis=1)['2013':'2018']
        if returns_type == 'cum':  # cumulative_return
            returns = df.pct_change(limit=2)['2013':'2018']
            returns = ((1 + returns).cumprod() - 1).mean(axis=1)
        if returns_type == 'log':  # log return
            returns = np.log(1 + df.pct_change(limit=2)).mean(
                axis=1)['2013':'2018']
        print(returns.describe())

        # check data completeness
        print('The following date have NaN return value')
        print(returns[returns.isna().any()])
        returns.fillna(method='bfill', inplace=True)

        returns.plot()
        plt.savefig('{}_daily_returns.png'.format(select_sheet))
        plt.close()

        stats_current = returns.describe()
        stats_current.name = select_sheet
        stats_list.append(stats_current)

        ##########################################################################
        # linear regression of fama french factors
        ##########################################################################
        slice_index_ols = returns.index.intersection(factors.index)

        X = factors.loc[slice_index_ols]
        y = returns.loc[slice_index_ols] - ff_rf[slice_index_ols]
        X_with_constant = sm.add_constant(X)
        model_static = sm.OLS(y, X_with_constant, missing='drop').fit()

        print(model_static.params)
        ols_current = model_static.params
        ols_current.name = select_sheet
        ols_list.append(ols_current)

        pvalues_current = model_static.pvalues
        pvalues_current.name = select_sheet
        pvalues_list.append(pvalues_current)

        with open('ols_summary_{}.csv'.format(select_sheet), 'w') as f:
            f.write(model_static.summary().as_csv())

        ##########################################################################
        # arch analysis of volatility
        ##########################################################################
        am = arch_model(returns)
        res = am.fit()
        print(res.summary())

        garch_current = res.params
        garch_current.name = select_sheet
        garch_list.append(garch_current)

        with open('garch_summary_{}.csv'.format(select_sheet), 'w') as f:
            f.write(res.summary().as_csv())

        res.plot(annualize='D')
        plt.savefig('garch_{}.png'.format(select_sheet))
        plt.close()

        ##########################################################################
        # arx analysis of volatility
        ##########################################################################
        from arch.univariate import ARX
        arx = ARX(returns, lags=[1])
        res = arx.fit()

        print(res.summary())

        arx_current = res.params
        arx_current.name = select_sheet
        arx_list.append(arx_current)

        with open('arx_summary_{}.csv'.format(select_sheet), 'w') as f:
            f.write(res.summary().as_csv())

        res.plot(annualize='D')
        plt.savefig('arx_{}.png'.format(select_sheet))
        plt.close()

    ##########################################################################
    # write all results
    ##########################################################################
    pd.concat(stats_list, axis=1).to_csv('greenfund_stats.csv')
    pd.concat(ols_list, axis=1).to_csv('greenfund_ols.csv')
    pd.concat(pvalues_list, axis=1).to_csv('greenfund_pvalues.csv')
    pd.concat(garch_list, axis=1).to_csv('greenfund_garch.csv')
    pd.concat(arx_list, axis=1).to_csv('greenfund_arx.csv')
Example #14
0
sm.graphics.tsa.plot_acf(rates)

# ARCH effect
ar_res = ar_select_order(rates, 5).model.fit()
# Test of no serial correlation and homoskedasticity
print(ar_res.diagnostic_summary())
print(ar_res.summary())
plt.figure()
plt.plot(ar_res.resid)

# a = ar_res.resid
# a_res = ar_select_order(a, 5).model.fit()
# print(a_res.diagnostic_summary())

# Fit with GARCH(p, q)
ar = ARX(rates, lags=[1, 2])  # Mean model
ar.volatility = GARCH(p=1, q=1)  # Volatility model
res = ar.fit()
res.plot()
print(res.summary())

# Forecast
drop = len(data) - len(rates)
start = 3254 - 2 - drop
end = 3262 - 2 - drop

var = res.forecast(start=start, horizon=5,
                   method='simulation').variance[start:1 + end]
var.plot()
entry = [
    '2012:06:20',
Example #15
0
from statsmodels.tsa.arima_model import ARMA
import pandas
import numpy
import statsmodels.api as sm

prices = pandas.read_csv("prices.csv", parse_dates=['Date'], index_col=0)
tickers = prices.columns[:-2]
prices = prices.resample('W').agg(lambda x: x[-1])
prices.dropna(axis=0, how='any', inplace=True)
rf = prices['^TNX'].values[:-1]
rf /= (52 * 100)
returns = prices.iloc[:, :-1].pct_change()[1:]
rm = returns['^GSPC'].values
ri = returns.iloc[:, :-1].values
Ri = ri - rf[:, numpy.newaxis]
Rm = rm - rf
model = sm.OLS(Ri, sm.add_constant(Rm))
results = model.fit()
alpha, beta = results.params
epsilon = numpy.sqrt(Ri.var(axis=0) - beta**2 * Rm.var(axis=0))
output = pandas.DataFrame(columns=['alpha', 'beta', 'epsilon'],
                          index=tickers,
                          data=numpy.array([alpha, beta, epsilon]).T)
output.to_csv("coefficients.csv")
from arch.univariate import ARX, GARCH
arx = ARX(rm, lags=1)
arx.volatility = GARCH()
res = arx.fit(disp='off')
pandas.DataFrame(res.params).to_csv("parameters.csv")
print(res.summary())
res.plot(annualize='D')


# In[56]:


res.plot(annualize='D')


# In[58]:


# AR
from arch.univariate import ARX
ar = ARX(ts_data, lags = [1, 3, 12])
# print(ar.fit().summary())


# In[60]:


# Volatility Processes
from arch.univariate import ARCH, GARCH
ar.volatility = ARCH(p=5)
res = ar.fit(update_freq=0, disp='off')
# print(res.summary())
fig = res.plot()

# Distribution
from arch.univariate import StudentsT