def model_returns_t(data, samples=500): """Run Bayesian model assuming returns are normally distributed. Parameters ---------- returns : pandas.Series Series of simple returns of an algorithm or stock. samples : int, optional Number of posterior samples to draw. Returns ------- pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. """ with pm.Model(): mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean()) sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std()) nu = pm.Exponential('nu_minus_two', 1. / 10., testval=3.) returns = pm.T('returns', nu=nu + 2, mu=mu, sd=sigma, observed=data) pm.Deterministic('annual volatility', returns.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'sharpe', returns.distribution.mean / returns.distribution.variance**.5 * np.sqrt(252)) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) step = pm.NUTS(scaling=start) trace = pm.sample(samples, step, start=start) return trace
def model_returns_t_alpha_beta(data, bmark, samples=2000): """Run Bayesian alpha-beta-model with T distributed returns. This model estimates intercept (alpha) and slope (beta) of two return sets. Usually, these will be algorithm returns and benchmark returns (e.g. S&P500). The data is assumed to be T distributed and thus is robust to outliers and takes tail events into account. Parameters ---------- returns : pandas.Series Series of simple returns of an algorithm or stock. bmark : pandas.Series Series of simple returns of a benchmark like the S&P500. If bmark has more recent returns than returns_train, these dates will be treated as missing values and predictions will be generated for them taking market correlations into account. samples : int (optional) Number of posterior samples to draw. Returns ------- pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. """ if len(data) != len(bmark): # pad missing data data = pd.Series(data, index=bmark.index) data_no_missing = data.dropna() with pm.Model(): sigma = pm.HalfCauchy('sigma', beta=1, testval=data_no_missing.values.std()) nu = pm.Exponential('nu_minus_two', 1. / 10., testval=.3) # alpha and beta beta_init, alpha_init = sp.stats.linregress( bmark.loc[data_no_missing.index], data_no_missing)[:2] alpha_reg = pm.Normal('alpha', mu=0, sd=.1, testval=alpha_init) beta_reg = pm.Normal('beta', mu=0, sd=1, testval=beta_init) pm.T('returns', nu=nu + 2, mu=alpha_reg + beta_reg * bmark, sd=sigma, observed=data) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) step = pm.NUTS(scaling=start) trace = pm.sample(samples, step, start=start) return trace
def model_stoch_vol(data, samples=2000): """Run stochastic volatility model. This model estimates the volatility of a returns series over time. Returns are assumed to be T-distributed. lambda (width of T-distributed) is assumed to follow a random-walk. Parameters ---------- data : pandas.Series Return series to model. samples : int, optional Posterior samples to draw. Returns ------- pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ from pymc3.distributions.timeseries import GaussianRandomWalk with pm.Model(): nu = pm.Exponential('nu', 1. / 10, testval=5.) sigma = pm.Exponential('sigma', 1. / .02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(data)) volatility_process = pm.Deterministic('volatility_process', pm.exp(-2 * s)) pm.T('r', nu, lam=volatility_process, observed=data) start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b) step = pm.NUTS(scaling=start) trace = pm.sample(100, step, progressbar=False) # Start next run at the last sampled position. step = pm.NUTS(scaling=trace[-1], gamma=.25) trace = pm.sample(samples, step, start=trace[-1], progressbar=False, njobs=2) return trace
Nx1Lvl = len(set(x1)) Nx2Lvl = len(set(x2)) NSLvl = len(set(S)) x1contrast_dict = {'X1.2vX1.1': [-1, 1]} x2contrast_dict = {'X2.2vX2.1': [-1, 1]} x1x2contrast_dict = None #np.arange(0, Nx1Lvl*Nx2Lvl).reshape(Nx1Lvl, -1).T z = (y - np.mean(y)) / np.std(y) z = (y - np.mean(y)) / np.std(y) # THE MODEL. with pm.Model() as model: # define the hyperpriors a1_SD_unabs = pm.T('a1_SD_unabs', mu=0, lam=0.001, nu=1) a1_SD = abs(a1_SD_unabs) + 0.1 a1tau = 1 / a1_SD**2 a2_SD_unabs = pm.T('a2_SD_unabs', mu=0, lam=0.001, nu=1) a2_SD = abs(a2_SD_unabs) + 0.1 a2tau = 1 / a2_SD**2 a1a2_SD_unabs = pm.T('a1a2_SD_unabs', mu=0, lam=0.001, nu=1) a1a2_SD = abs(a1a2_SD_unabs) + 0.1 a1a2tau = 1 / a1a2_SD**2 # define the priors sigma = pm.Uniform('sigma', 0, 10) # y values are assumed to be standardized tau = 1 / sigma**2
zx = (x - x_m) / x_sd zy = (y - y_m) / y_sd tdf_gain = 1 # 1 for low-baised tdf, 100 for high-biased tdf # THE MODEL with pm.Model() as model: # define the priors udf = pm.Uniform('udf', 0, 1) tdf = 1 - tdf_gain * pm.log(1 - udf) # tdf in [1,Inf). tau = pm.Gamma('tau', 0.001, 0.001) beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12) beta1 = pm.Normal('beta1', mu=0, tau=1.0E-12) mu = beta0 + beta1 * zx # define the likelihood yl = pm.T('yl', mu=mu, lam=tau, nu=tdf, observed=zy) # Generate a MCMC chain start = pm.find_MAP() step = pm.Metropolis() trace = pm.sample(20000, step, start, progressbar=False) # EXAMINE THE RESULTS burnin = 1000 thin = 10 ## Print summary for each trace #pm.summary(trace[burnin::thin]) #pm.summary(trace) ## Check for mixing and autocorrelation
def model_returns_t_alpha_beta(data, bmark, samples=2000): """Run Bayesian alpha-beta-model with T distributed returns. This model estimates intercept (alpha) and slope (beta) of two return sets. Usually, these will be algorithm returns and benchmark returns (e.g. S&P500). The data is assumed to be T distributed and thus is robust to outliers and takes tail events into account. If a pandas.DataFrame is passed as a benchmark, then multiple linear regression is used to estimate alpha and beta. Parameters ---------- returns : pandas.Series Series of simple returns of an algorithm or stock. bmark : pandas.DataFrame DataFrame of benchmark returns (e.g., S&P500) or risk factors (e.g., Fama-French SMB, HML, and UMD). If bmark has more recent returns than returns_train, these dates will be treated as missing values and predictions will be generated for them taking market correlations into account. samples : int (optional) Number of posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. """ if data.shape[0] != bmark.shape[0]: data = pd.Series(data, index=bmark.index) data_no_missing = data.dropna() if bmark.ndim == 1: bmark = pd.DataFrame(bmark) bmark = bmark.loc[data_no_missing.index] n_bmark = bmark.shape[1] with pm.Model() as model: sigma = pm.HalfCauchy('sigma', beta=1, testval=data_no_missing.values.std()) nu = pm.Exponential('nu_minus_two', 1. / 10., testval=.3) # alpha and beta X = bmark.loc[data_no_missing.index] X.loc[:, 'ones'] = 1. y = data_no_missing alphabeta_init = np.linalg.lstsq(X, y)[0] alpha_reg = pm.Normal('alpha', mu=0, sd=.1, testval=alphabeta_init[-1]) beta_reg = pm.Normal('beta', mu=0, sd=1, testval=alphabeta_init[:-1], shape=n_bmark) bmark_theano = tt.as_tensor_variable(bmark.values.T) mu_reg = alpha_reg + tt.dot(beta_reg, bmark_theano) pm.T('returns', nu=nu + 2, mu=mu_reg, sd=sigma, observed=data) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) step = pm.NUTS(scaling=start) trace = pm.sample(samples, step, start=start) return model, trace
def model_best(y1, y2, samples=1000): """Bayesian Estimation Supersedes the T-Test This model runs a Bayesian hypothesis comparing if y1 and y2 come from the same distribution. Returns are assumed to be T-distributed. In addition, computes annual volatility and Sharpe of in and out-of-sample periods. This model replicates the example used in: Kruschke, John. (2012) Bayesian estimation supersedes the t test. Journal of Experimental Psychology: General. Parameters ---------- y1 : array-like Array of returns (e.g. in-sample) y2 : array-like Array of returns (e.g. out-of-sample) samples : int, optional Number of posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ y = np.concatenate((y1, y2)) mu_m = np.mean(y) mu_p = 0.000001 * 1 / np.std(y)**2 sigma_low = np.std(y) / 1000 sigma_high = np.std(y) * 1000 with pm.Model() as model: group1_mean = pm.Normal('group1_mean', mu=mu_m, tau=mu_p, testval=y1.mean()) group2_mean = pm.Normal('group2_mean', mu=mu_m, tau=mu_p, testval=y2.mean()) group1_std = pm.Uniform('group1_std', lower=sigma_low, upper=sigma_high, testval=y1.std()) group2_std = pm.Uniform('group2_std', lower=sigma_low, upper=sigma_high, testval=y2.std()) nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2. returns_group1 = pm.T('group1', nu=nu, mu=group1_mean, lam=group1_std**-2, observed=y1) returns_group2 = pm.T('group2', nu=nu, mu=group2_mean, lam=group2_std**-2, observed=y2) diff_of_means = pm.Deterministic('difference of means', group2_mean - group1_mean) pm.Deterministic('difference of stds', group2_std - group1_std) pm.Deterministic( 'effect size', diff_of_means / pm.sqrt( (group1_std**2 + group2_std**2) / 2)) pm.Deterministic( 'group1_annual_volatility', returns_group1.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'group2_annual_volatility', returns_group2.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'group1_sharpe', returns_group1.distribution.mean / returns_group1.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'group2_sharpe', returns_group2.distribution.mean / returns_group2.distribution.variance**.5 * np.sqrt(252)) step = pm.NUTS() trace = pm.sample(samples, step) return model, trace
#x = x.iloc[include_only] predictor_names = x.columns n_predictors = len(predictor_names) # THE MODEL with pm.Model() as model: # define hyperpriors muB = pm.Normal('muB', 0,.100 ) tauB = pm.Gamma('tauB', .01, .01) udfB = pm.Uniform('udfB', 0, 1) tdfB = 1 + tdfBgain * (-pm.log(1 - udfB)) # define the priors tau = pm.Gamma('tau', 0.01, 0.01) beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12) beta1 = pm.T('beta1', mu=muB, lam=tauB, nu=tdfB, shape=n_predictors) mu = beta0 + pm.dot(beta1, x.values.T) # define the likelihood #mu = beta0 + beta1[0] * x.values[:,0] + beta1[1] * x.values[:,1] yl = pm.Normal('yl', mu=mu, tau=tau, observed=y) # Generate a MCMC chain start = pm.find_MAP() step1 = pm.NUTS([beta1]) step2 = pm.Metropolis([beta0, tau, muB, tauB, udfB]) trace = pm.sample(10000, [step1, step2], start, progressbar=False) # EXAMINE THE RESULTS burnin = 2000 thin = 1
NxLvl = len(set(x)) # # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD: contrast_dict = None for g1idx in range(NxLvl): for g2idx in range(g1idx + 1, NxLvl): cmpVec = np.repeat(0, NxLvl) cmpVec[g1idx] = -1 cmpVec[g2idx] = 1 contrast_dict = (contrast_dict, cmpVec) z = (y - np.mean(y)) / np.std(y) ## THE MODEL. with pm.Model() as model: # define the hyperpriors a_SD_unabs = pm.T('a_SD_unabs', mu=0, lam=0.001, nu=1) a_SD = abs(a_SD_unabs) + 0.1 atau = 1 / a_SD**2 # define the priors sigma = pm.Uniform('sigma', 0, 10) # y values are assumed to be standardized tau = 1 / sigma**2 a0 = pm.Normal('a0', mu=0, tau=0.001) # y values are assumed to be standardized a = pm.Normal('a', mu=0, tau=atau, shape=NxLvl) mu = a0 + a # define the likelihood yl = pm.Normal('yl', mu[x], tau=tau, observed=z) # Generate a MCMC chain start = pm.find_MAP() steps = pm.Metropolis()