Esempio n. 1
0
def model_returns_t(data, samples=500):
    """Run Bayesian model assuming returns are normally distributed.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    """

    with pm.Model():
        mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
        sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=3.)

        returns = pm.T('returns', nu=nu + 2, mu=mu, sd=sigma, observed=data)
        pm.Deterministic('annual volatility',
                         returns.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic(
            'sharpe', returns.distribution.mean /
            returns.distribution.variance**.5 * np.sqrt(252))

        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start)
    return trace
Esempio n. 2
0
def model_returns_t_alpha_beta(data, bmark, samples=2000):
    """Run Bayesian alpha-beta-model with T distributed returns.

    This model estimates intercept (alpha) and slope (beta) of two
    return sets. Usually, these will be algorithm returns and
    benchmark returns (e.g. S&P500). The data is assumed to be T
    distributed and thus is robust to outliers and takes tail events
    into account.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    bmark : pandas.Series
        Series of simple returns of a benchmark like the S&P500.
        If bmark has more recent returns than returns_train, these dates
        will be treated as missing values and predictions will be
        generated for them taking market correlations into account.
    samples : int (optional)
        Number of posterior samples to draw.

    Returns
    -------
    pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """

    if len(data) != len(bmark):
        # pad missing data
        data = pd.Series(data, index=bmark.index)

    data_no_missing = data.dropna()

    with pm.Model():
        sigma = pm.HalfCauchy('sigma',
                              beta=1,
                              testval=data_no_missing.values.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=.3)

        # alpha and beta
        beta_init, alpha_init = sp.stats.linregress(
            bmark.loc[data_no_missing.index], data_no_missing)[:2]

        alpha_reg = pm.Normal('alpha', mu=0, sd=.1, testval=alpha_init)
        beta_reg = pm.Normal('beta', mu=0, sd=1, testval=beta_init)

        pm.T('returns',
             nu=nu + 2,
             mu=alpha_reg + beta_reg * bmark,
             sd=sigma,
             observed=data)
        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start)

    return trace
Esempio n. 3
0
def model_stoch_vol(data, samples=2000):
    """Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """
    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model():
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.exp(-2 * s))
        pm.T('r', nu, lam=volatility_process, observed=data)
        start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b)

        step = pm.NUTS(scaling=start)
        trace = pm.sample(100, step, progressbar=False)

        # Start next run at the last sampled position.
        step = pm.NUTS(scaling=trace[-1], gamma=.25)
        trace = pm.sample(samples,
                          step,
                          start=trace[-1],
                          progressbar=False,
                          njobs=2)

    return trace
Esempio n. 4
0
    Nx1Lvl = len(set(x1))
    Nx2Lvl = len(set(x2))
    NSLvl = len(set(S))
    x1contrast_dict = {'X1.2vX1.1': [-1, 1]}
    x2contrast_dict = {'X2.2vX2.1': [-1, 1]}
    x1x2contrast_dict = None  #np.arange(0, Nx1Lvl*Nx2Lvl).reshape(Nx1Lvl, -1).T

z = (y - np.mean(y)) / np.std(y)

z = (y - np.mean(y)) / np.std(y)

# THE MODEL.

with pm.Model() as model:
    # define the hyperpriors
    a1_SD_unabs = pm.T('a1_SD_unabs', mu=0, lam=0.001, nu=1)
    a1_SD = abs(a1_SD_unabs) + 0.1
    a1tau = 1 / a1_SD**2

    a2_SD_unabs = pm.T('a2_SD_unabs', mu=0, lam=0.001, nu=1)
    a2_SD = abs(a2_SD_unabs) + 0.1
    a2tau = 1 / a2_SD**2

    a1a2_SD_unabs = pm.T('a1a2_SD_unabs', mu=0, lam=0.001, nu=1)
    a1a2_SD = abs(a1a2_SD_unabs) + 0.1
    a1a2tau = 1 / a1a2_SD**2

    # define the priors
    sigma = pm.Uniform('sigma', 0,
                       10)  # y values are assumed to be standardized
    tau = 1 / sigma**2
Esempio n. 5
0
zx = (x - x_m) / x_sd
zy = (y - y_m) / y_sd

tdf_gain = 1 # 1 for low-baised tdf, 100 for high-biased tdf

# THE MODEL
with pm.Model() as model:
    # define the priors
    udf = pm.Uniform('udf', 0, 1)
    tdf = 1 - tdf_gain * pm.log(1 - udf) # tdf in [1,Inf).
    tau = pm.Gamma('tau', 0.001, 0.001)
    beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12)
    beta1 = pm.Normal('beta1', mu=0, tau=1.0E-12)
    mu = beta0 + beta1 * zx
    # define the likelihood
    yl = pm.T('yl', mu=mu, lam=tau, nu=tdf, observed=zy)
    # Generate a MCMC chain
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(20000, step, start, progressbar=False)


# EXAMINE THE RESULTS
burnin = 1000
thin = 10

## Print summary for each trace
#pm.summary(trace[burnin::thin])
#pm.summary(trace)

## Check for mixing and autocorrelation
Esempio n. 6
0
def model_returns_t_alpha_beta(data, bmark, samples=2000):
    """Run Bayesian alpha-beta-model with T distributed returns.

    This model estimates intercept (alpha) and slope (beta) of two
    return sets. Usually, these will be algorithm returns and
    benchmark returns (e.g. S&P500). The data is assumed to be T
    distributed and thus is robust to outliers and takes tail events
    into account.  If a pandas.DataFrame is passed as a benchmark, then
    multiple linear regression is used to estimate alpha and beta.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    bmark : pandas.DataFrame
        DataFrame of benchmark returns (e.g., S&P500) or risk factors (e.g.,
        Fama-French SMB, HML, and UMD).
        If bmark has more recent returns than returns_train, these dates
        will be treated as missing values and predictions will be
        generated for them taking market correlations into account.
    samples : int (optional)
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """
    if data.shape[0] != bmark.shape[0]:
        data = pd.Series(data, index=bmark.index)

    data_no_missing = data.dropna()

    if bmark.ndim == 1:
        bmark = pd.DataFrame(bmark)

    bmark = bmark.loc[data_no_missing.index]
    n_bmark = bmark.shape[1]

    with pm.Model() as model:
        sigma = pm.HalfCauchy('sigma',
                              beta=1,
                              testval=data_no_missing.values.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=.3)

        # alpha and beta
        X = bmark.loc[data_no_missing.index]
        X.loc[:, 'ones'] = 1.
        y = data_no_missing
        alphabeta_init = np.linalg.lstsq(X, y)[0]

        alpha_reg = pm.Normal('alpha', mu=0, sd=.1, testval=alphabeta_init[-1])
        beta_reg = pm.Normal('beta',
                             mu=0,
                             sd=1,
                             testval=alphabeta_init[:-1],
                             shape=n_bmark)
        bmark_theano = tt.as_tensor_variable(bmark.values.T)
        mu_reg = alpha_reg + tt.dot(beta_reg, bmark_theano)
        pm.T('returns', nu=nu + 2, mu=mu_reg, sd=sigma, observed=data)
        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start)

    return model, trace
Esempio n. 7
0
def model_best(y1, y2, samples=1000):
    """Bayesian Estimation Supersedes the T-Test

    This model runs a Bayesian hypothesis comparing if y1 and y2 come
    from the same distribution. Returns are assumed to be T-distributed.

    In addition, computes annual volatility and Sharpe of in and
    out-of-sample periods.

    This model replicates the example used in:
    Kruschke, John. (2012) Bayesian estimation supersedes the t
    test. Journal of Experimental Psychology: General.

    Parameters
    ----------
    y1 : array-like
        Array of returns (e.g. in-sample)
    y2 : array-like
        Array of returns (e.g. out-of-sample)
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    y = np.concatenate((y1, y2))

    mu_m = np.mean(y)
    mu_p = 0.000001 * 1 / np.std(y)**2

    sigma_low = np.std(y) / 1000
    sigma_high = np.std(y) * 1000
    with pm.Model() as model:
        group1_mean = pm.Normal('group1_mean',
                                mu=mu_m,
                                tau=mu_p,
                                testval=y1.mean())
        group2_mean = pm.Normal('group2_mean',
                                mu=mu_m,
                                tau=mu_p,
                                testval=y2.mean())
        group1_std = pm.Uniform('group1_std',
                                lower=sigma_low,
                                upper=sigma_high,
                                testval=y1.std())
        group2_std = pm.Uniform('group2_std',
                                lower=sigma_low,
                                upper=sigma_high,
                                testval=y2.std())
        nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2.

        returns_group1 = pm.T('group1',
                              nu=nu,
                              mu=group1_mean,
                              lam=group1_std**-2,
                              observed=y1)
        returns_group2 = pm.T('group2',
                              nu=nu,
                              mu=group2_mean,
                              lam=group2_std**-2,
                              observed=y2)

        diff_of_means = pm.Deterministic('difference of means',
                                         group2_mean - group1_mean)
        pm.Deterministic('difference of stds', group2_std - group1_std)
        pm.Deterministic(
            'effect size', diff_of_means / pm.sqrt(
                (group1_std**2 + group2_std**2) / 2))

        pm.Deterministic(
            'group1_annual_volatility',
            returns_group1.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'group2_annual_volatility',
            returns_group2.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic(
            'group1_sharpe', returns_group1.distribution.mean /
            returns_group1.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'group2_sharpe', returns_group2.distribution.mean /
            returns_group2.distribution.variance**.5 * np.sqrt(252))

        step = pm.NUTS()

        trace = pm.sample(samples, step)
    return model, trace
    #x = x.iloc[include_only]
    predictor_names = x.columns
    n_predictors = len(predictor_names)


# THE MODEL
with pm.Model() as model:
    # define hyperpriors
    muB = pm.Normal('muB', 0,.100 )
    tauB = pm.Gamma('tauB', .01, .01)
    udfB = pm.Uniform('udfB', 0, 1)
    tdfB = 1 + tdfBgain * (-pm.log(1 - udfB))
    # define the priors
    tau = pm.Gamma('tau', 0.01, 0.01)
    beta0 = pm.Normal('beta0', mu=0, tau=1.0E-12)
    beta1 = pm.T('beta1', mu=muB, lam=tauB, nu=tdfB, shape=n_predictors)
    mu = beta0 + pm.dot(beta1, x.values.T)
    # define the likelihood
    #mu = beta0 + beta1[0] * x.values[:,0] + beta1[1] * x.values[:,1]
    yl = pm.Normal('yl', mu=mu, tau=tau, observed=y)
    # Generate a MCMC chain
    start = pm.find_MAP()
    step1 = pm.NUTS([beta1])
    step2 = pm.Metropolis([beta0, tau, muB, tauB, udfB])
    trace = pm.sample(10000, [step1, step2], start, progressbar=False)


# EXAMINE THE RESULTS
burnin = 2000
thin = 1
    NxLvl = len(set(x))
    #  # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD:
    contrast_dict = None
    for g1idx in range(NxLvl):
        for g2idx in range(g1idx + 1, NxLvl):
            cmpVec = np.repeat(0, NxLvl)
            cmpVec[g1idx] = -1
            cmpVec[g2idx] = 1
            contrast_dict = (contrast_dict, cmpVec)

z = (y - np.mean(y)) / np.std(y)

## THE MODEL.
with pm.Model() as model:
    # define the hyperpriors
    a_SD_unabs = pm.T('a_SD_unabs', mu=0, lam=0.001, nu=1)
    a_SD = abs(a_SD_unabs) + 0.1
    atau = 1 / a_SD**2
    # define the priors
    sigma = pm.Uniform('sigma', 0,
                       10)  # y values are assumed to be standardized
    tau = 1 / sigma**2
    a0 = pm.Normal('a0', mu=0,
                   tau=0.001)  # y values are assumed to be standardized
    a = pm.Normal('a', mu=0, tau=atau, shape=NxLvl)
    mu = a0 + a
    # define the likelihood
    yl = pm.Normal('yl', mu[x], tau=tau, observed=z)
    # Generate a MCMC chain
    start = pm.find_MAP()
    steps = pm.Metropolis()