def model_best(y1, y2, samples=1000): """Bayesian Estimation Supersedes the T-Test This model runs a Bayesian hypothesis comparing if y1 and y2 come from the same distribution. Returns are assumed to be T-distributed. In addition, computes annual volatility and Sharpe of in and out-of-sample periods. This model replicates the example used in: Kruschke, John. (2012) Bayesian estimation supersedes the t test. Journal of Experimental Psychology: General. Parameters ---------- y1 : array-like Array of returns (e.g. in-sample) y2 : array-like Array of returns (e.g. out-of-sample) samples : int, optional Number of posterior samples to draw. Returns ------- pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ y = np.concatenate((y1, y2)) mu_m = np.mean(y) mu_p = 0.000001 * 1 / np.std(y)**2 sigma_low = np.std(y)/1000 sigma_high = np.std(y)*1000 with pm.Model(): group1_mean = pm.Normal('group1_mean', mu=mu_m, tau=mu_p, testval=y1.mean()) group2_mean = pm.Normal('group2_mean', mu=mu_m, tau=mu_p, testval=y2.mean()) group1_std = pm.Uniform('group1_std', lower=sigma_low, upper=sigma_high, testval=y1.std()) group2_std = pm.Uniform('group2_std', lower=sigma_low, upper=sigma_high, testval=y2.std()) nu = pm.Exponential('nu_minus_two', 1/29., testval=4.) + 2. returns_group1 = pm.T('group1', nu=nu, mu=group1_mean, lam=group1_std**-2, observed=y1) returns_group2 = pm.T('group2', nu=nu, mu=group2_mean, lam=group2_std**-2, observed=y2) diff_of_means = pm.Deterministic('difference of means', group2_mean - group1_mean) pm.Deterministic('difference of stds', group2_std - group1_std) pm.Deterministic('effect size', diff_of_means / pm.sqrt((group1_std**2 + group2_std**2) / 2)) pm.Deterministic('group1_annual_volatility', returns_group1.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic('group2_annual_volatility', returns_group2.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic('group1_sharpe', returns_group1.distribution.mean / returns_group1.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic('group2_sharpe', returns_group2.distribution.mean / returns_group2.distribution.variance**.5 * np.sqrt(252)) step = pm.NUTS() trace = pm.sample(samples, step) return trace
def model_best(y1, y2, samples=1000): """Bayesian Estimation Supersedes the T-Test This model runs a Bayesian hypothesis comparing if y1 and y2 come from the same distribution. Returns are assumed to be T-distributed. In addition, computes annual volatility and Sharpe of in and out-of-sample periods. This model replicates the example used in: Kruschke, John. (2012) Bayesian estimation supersedes the t test. Journal of Experimental Psychology: General. Parameters ---------- y1 : array-like Array of returns (e.g. in-sample) y2 : array-like Array of returns (e.g. out-of-sample) samples : int, optional Number of posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ y = np.concatenate((y1, y2)) mu_m = np.mean(y) mu_p = 0.000001 * 1 / np.std(y)**2 sigma_low = np.std(y) / 1000 sigma_high = np.std(y) * 1000 with pm.Model() as model: group1_mean = pm.Normal('group1_mean', mu=mu_m, tau=mu_p, testval=y1.mean()) group2_mean = pm.Normal('group2_mean', mu=mu_m, tau=mu_p, testval=y2.mean()) group1_std = pm.Uniform('group1_std', lower=sigma_low, upper=sigma_high, testval=y1.std()) group2_std = pm.Uniform('group2_std', lower=sigma_low, upper=sigma_high, testval=y2.std()) nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2. returns_group1 = pm.T('group1', nu=nu, mu=group1_mean, lam=group1_std**-2, observed=y1) returns_group2 = pm.T('group2', nu=nu, mu=group2_mean, lam=group2_std**-2, observed=y2) diff_of_means = pm.Deterministic('difference of means', group2_mean - group1_mean) pm.Deterministic('difference of stds', group2_std - group1_std) pm.Deterministic( 'effect size', diff_of_means / pm.sqrt( (group1_std**2 + group2_std**2) / 2)) pm.Deterministic( 'group1_annual_volatility', returns_group1.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'group2_annual_volatility', returns_group2.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'group1_sharpe', returns_group1.distribution.mean / returns_group1.distribution.variance**.5 * np.sqrt(252)) pm.Deterministic( 'group2_sharpe', returns_group2.distribution.mean / returns_group2.distribution.variance**.5 * np.sqrt(252)) step = pm.NUTS() trace = pm.sample(samples, step) return model, trace
lam1 = group1_std**-2 lam2 = group2_std**-2 group1 = pm.StudentT('drug', nu=nu, mu=group1_mean, lam=lam1, observed=y1) group2 = pm.StudentT('placebo', nu=nu, mu=group2_mean, lam=lam2, observed=y2) diff_of_means = pm.Deterministic('difference of means', group1_mean - group2_mean) diff_of_stds = pm.Deterministic('difference of stds', group1_std - group2_std) effect_size = pm.Deterministic( 'effect size', diff_of_means / pm.sqrt( (group1_std**2 + group2_std**2) / 2)) step = pm.NUTS() def run(n=3000): if n == "short": n = 500 with model: trace = pm.sample(n, step) burn = n / 10 pm.traceplot(trace[burn:]) pm.plots.summary(trace[burn:])
with pm.Model() as model: group1_mean = pm.Normal('group1_mean', mu=mu_m, tau=mu_p, testval=y1.mean()) group2_mean = pm.Normal('group2_mean', mu=mu_m, tau=mu_p, testval=y2.mean()) group1_std = pm.Uniform('group1_std', lower=sigma_low, upper=sigma_high, testval=y1.std()) group2_std = pm.Uniform('group2_std', lower=sigma_low, upper=sigma_high, testval=y2.std()) nu = pm.Exponential('nu_minus_one', 1/29.) + 1 lam1 = group1_std**-2 lam2 = group2_std**-2 group1 = pm.T('drug', nu=nu, mu=group1_mean, lam=lam1, observed=y1) group2 = pm.T('placebo', nu=nu, mu=group2_mean, lam=lam2, observed=y2) diff_of_means = pm.Deterministic('difference of means', group1_mean - group2_mean) diff_of_stds = pm.Deterministic('difference of stds', group1_std - group2_std) effect_size = pm.Deterministic('effect size', diff_of_means / pm.sqrt((group1_std**2 + group2_std**2) / 2)) step = pm.NUTS() def run(n=3000): if n == "short": n = 500 with model: trace = pm.sample(n, step) burn = n/10 pm.traceplot(trace[burn:]); pm.plots.summary(trace[burn:]) if __name__ == '__main__':