Esempio n. 1
0
    def time_drug_evaluation(self):
        # fmt: off
        drug = np.array([101, 100, 102, 104, 102, 97, 105, 105, 98, 101,
                         100, 123, 105, 103, 100, 95, 102, 106, 109, 102, 82,
                         102, 100, 102, 102, 101, 102, 102, 103, 103, 97, 97,
                         103, 101, 97, 104, 96, 103, 124, 101, 101, 100, 101,
                         101, 104, 100, 101])
        placebo = np.array([99, 101, 100, 101, 102, 100, 97, 101, 104, 101,
                            102, 102, 100, 105, 88, 101, 100, 104, 100, 100,
                            100, 101, 102, 103, 97, 101, 101, 100, 101, 99,
                            101, 100, 100, 101, 100, 99, 101, 100, 102, 99,
                            100, 99])
        # fmt: on

        y = pd.DataFrame(
            {
                "value": np.r_[drug, placebo],
                "group": np.r_[["drug"] * len(drug), ["placebo"] * len(placebo)],
            }
        )
        y_mean = y.value.mean()
        y_std = y.value.std() * 2

        sigma_low = 1
        sigma_high = 10
        with pm.Model():
            group1_mean = pm.Normal("group1_mean", y_mean, sd=y_std)
            group2_mean = pm.Normal("group2_mean", y_mean, sd=y_std)
            group1_std = pm.Uniform("group1_std", lower=sigma_low, upper=sigma_high)
            group2_std = pm.Uniform("group2_std", lower=sigma_low, upper=sigma_high)
            lambda_1 = group1_std ** -2
            lambda_2 = group2_std ** -2

            nu = pm.Exponential("ν_minus_one", 1 / 29.0) + 1

            pm.StudentT("drug", nu=nu, mu=group1_mean, lam=lambda_1, observed=drug)
            pm.StudentT("placebo", nu=nu, mu=group2_mean, lam=lambda_2, observed=placebo)
            diff_of_means = pm.Deterministic("difference of means", group1_mean - group2_mean)
            pm.Deterministic("difference of stds", group1_std - group2_std)
            pm.Deterministic(
                "effect size", diff_of_means / np.sqrt((group1_std ** 2 + group2_std ** 2) / 2)
            )
            pm.sample(
                draws=20000, cores=4, chains=4, progressbar=False, compute_convergence_checks=False
            )
Esempio n. 2
0
def model_returns_t_alpha_beta(data, bmark, samples=2000, progressbar=True):
    """
    Run Bayesian alpha-beta-model with T distributed returns.

    This model estimates intercept (alpha) and slope (beta) of two
    return sets. Usually, these will be algorithm returns and
    benchmark returns (e.g. S&P500). The data is assumed to be T
    distributed and thus is robust to outliers and takes tail events
    into account.  If a pandas.DataFrame is passed as a benchmark, then
    multiple linear regression is used to estimate alpha and beta.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    bmark : pandas.DataFrame
        DataFrame of benchmark returns (e.g., S&P500) or risk factors (e.g.,
        Fama-French SMB, HML, and UMD).
        If bmark has more recent returns than returns_train, these dates
        will be treated as missing values and predictions will be
        generated for them taking market correlations into account.
    samples : int (optional)
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """

    data_bmark = pd.concat([data, bmark], axis=1).dropna()

    with pm.Model() as model:
        sigma = pm.HalfCauchy(
            'sigma',
            beta=1)
        nu = pm.Exponential('nu_minus_two', 1. / 10.)

        # alpha and beta
        X = data_bmark.iloc[:, 1]
        y = data_bmark.iloc[:, 0]

        alpha_reg = pm.Normal('alpha', mu=0, sd=.1)
        beta_reg = pm.Normal('beta', mu=0, sd=1)

        mu_reg = alpha_reg + beta_reg * X
        pm.StudentT('returns',
                    nu=nu + 2,
                    mu=mu_reg,
                    sd=sigma,
                    observed=y)
        trace = pm.sample(samples, progressbar=progressbar)

    return model, trace
Esempio n. 3
0
def stochastic_vol_model(returns):
    with pm.Model() as model:
        step_size = pm.Exponential('sigma', 50.)
        s = GaussianRandomWalk('s', sd=step_size, shape=len(returns))
        nu = pm.Exponential('nu', .1)
        r = pm.StudentT('r', nu=nu, lam=pm.math.exp(-2*s), observed=returns)
    with model:
        trace = pm.sample(tune=2000, nuts_kwargs=dict(target_accept=.9))
        return exp(trace[s].T)
Esempio n. 4
0
def _vol_model(df: pd.DataFrame):
    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(df.index))
        vol_process = pm.Deterministic('vol_process', pm.math.exp(-2 * s))
        r = pm.StudentT('r', nu, lam=1 / vol_process, observed=df)
    with model:
        trace = pm.sample(20000)
    return trace
Esempio n. 5
0
    def create_model(self):
        """ Creates and returns the PyMC3 model.

        Note: The size of the shared variables must match the size of the
        training data. Otherwise, setting the shared variables later will raise
        an error. See http://docs.pymc.io/advanced_theano.html

        Returns
        ----------
        model : the PyMC3 model
        """
        model_input = theano.shared(np.zeros([self.num_training_samples,
                                              self.num_pred]))

        model_output = theano.shared(np.zeros(self.num_training_samples))

        self.shared_vars = {
            'model_input': model_input,
            'model_output': model_output,
        }

        self.gp = None
        model = pm.Model()

        with model:
            length_scale = pm.Gamma('length_scale', alpha=2, beta=0.5,
                                    shape=(1, self.num_pred))
            signal_variance = pm.HalfCauchy('signal_variance', beta=2,
                                            shape=1)
            noise_variance = pm.HalfCauchy('noise_variance', beta=2,
                                           shape=1)
            degrees_of_freedom = pm.Gamma('degrees_of_freedom', alpha=2,
                                          beta=0.1, shape=1)

            if self.kernel is None:
                cov_function = signal_variance ** 2 * RBF(
                    input_dim=self.num_pred,
                    ls=length_scale)
            else:
                cov_function = self.kernel

            if self.prior_mean is None:
                mean_function = pm.gp.mean.Zero()
            else:
                mean_function = pm.gp.mean.Constant(c=self.prior_mean)

            self.gp = pm.gp.Latent(mean_func=mean_function,
                                   cov_func=cov_function)

            f = self.gp.prior('f', X=model_input.get_value())

            y = pm.StudentT('y', mu=f, lam=1 / signal_variance,
                            nu=degrees_of_freedom, observed=model_output)

        return model
Esempio n. 6
0
def draws_from_StudentT(data, uncertainties):
    #pymc3 model
    with pm.Model() as model:
        sig_prior = pm.HalfNormal('sig', 50)
        vel_prior = pm.Normal('vel', 0.0, 50.0)
        lognu_prior = pm.Uniform('lognu', -2.0, np.log(20))
        nu_prior = pm.Deterministic('nu', pm.math.exp(lognu_prior))

        vel_tracers = pm.Normal('vel-tracers',
                                mu=vel_prior,
                                sd=uncertainties,
                                shape=len(data))

        measurements = pm.StudentT('measurements',
                                   nu=nu_prior,
                                   mu=vel_tracers,
                                   sd=sig_prior,
                                   observed=data)
        trace = pm.sample(2000, tune=10000)

    #Plot these traces
    pm.traceplot(trace)
    plt.savefig('Plots/studentT_traceplot.pdf')
    plt.savefig('Plots/studentT_traceplot.jpg')
    #Make a KDE approximation to the sigma posterior
    xx = np.linspace(0.0, 30.0, 1000)
    kde_approximation = stats.gaussian_kde(trace['sig'])

    #Plot things
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(xx, kde_approximation(xx), c='r', linewidth=3.0)
    ax.hist(trace['sig'],
            100,
            facecolor='0.8',
            edgecolor='k',
            histtype='stepfilled',
            normed=True,
            linewidth=2.0)

    ax.axvline(xx[np.argmax(kde_approximation(xx))],
               c='k',
               linestyle='dashed',
               linewidth=2.0)

    ax.set_xlim([0.0, 30.0])
    ax.set_ylabel(r'PDF')
    ax.set_yticks([])
    #ax.tick_params(axis='both', which='major', labelsize=15)
    ax.set_xlabel(r'$\sigma$ (kms$^{-1}$)')

    fig.tight_layout()
    fig.savefig('Plots/studentT_pdf.pdf')
    fig.savefig('Plots/studentT_pdf.jpg')

    return trace, kde_approximation
Esempio n. 7
0
def two_sample_best(y1, y2, sigma_low=1.0, sigma_high=10.0, sample_kwargs={}):
    """Run two-sample BEST model

    Args:
        y1 (array): Group 1 values
        y2 (array): Group 2 values
        sigma_low (float, optional): Lower bound of uniform prior on group standard deviation. Defaults to 1.0.
        sigma_high (float, optional): Upper bound of uniform prior on group standard deviation. Defaults to 10.0.
        sample_kwargs : dict, optional
            additional keyword arguments passed on to pymc3.sample

    Returns:
        arviz.InferenceData
    """
    y = np.concatenate([y1, y2])
    mu_m = y.mean()
    mu_sd = y.std() * 2

    with pm.Model() as BEST:
        # Priors
        group1_mean = pm.Normal("group1_mean", mu=mu_m, sd=mu_sd)
        group2_mean = pm.Normal("group2_mean", mu=mu_m, sd=mu_sd)
        group1_sd = pm.Uniform("group1_sd", lower=sigma_low, upper=sigma_high)
        group2_sd = pm.Uniform("group2_sd", lower=sigma_low, upper=sigma_high)
        nu = pm.Exponential("nu_minus_one", 1.0 / 29.0) + 1.0

        # Deterministics
        lam1 = group1_sd ** -2
        lam2 = group2_sd ** -2
        diff_of_means = pm.Deterministic("diff_of_means", group1_mean - group2_mean)
        diff_of_sds = pm.Deterministic("diff_of_sds", group1_sd - group2_sd)
        pooled_sd = np.sqrt((group1_sd ** 2 + group2_sd ** 2) / 2)
        effect_size = pm.Deterministic("d", diff_of_means / pooled_sd)

        # Likelihood
        group1 = pm.StudentT("group1", nu=nu, mu=group1_mean, lam=lam1, observed=y1)
        group2 = pm.StudentT("group2", nu=nu, mu=group2_mean, lam=lam2, observed=y2)

        # MCMC
        inferencedata = pm.sample(return_inferencedata=True, **sample_kwargs)

    return inferencedata
    def build_model(self, n=None, name='archimedian_model'):
        with pm.Model(name=name) as self.model:
            if n is None:
                # one n per galaxy, or per arm?
                self.n_choice = pm.Categorical('n_choice', [1, 1, 0, 1, 1],
                                               testval=1,
                                               shape=len(self.galaxies))
                self.n = pm.Deterministic('n', self.n_choice - 2)
                self.chirality_correction = tt.switch(self.n < 0, -1, 1)
            else:
                msg = 'Parameter $n$ must be a nonzero float'
                try:
                    n = float(n)
                except ValueError:
                    pass
                finally:
                    assert isinstance(n, float) and n != 0, msg

                self.n_choice = None
                self.n = pm.Deterministic('n',
                                          np.repeat(n, len(self.galaxies)))

            self.chirality_correction = tt.switch(self.n < 0, -1, 1)
            self.a = pm.HalfCauchy('a', beta=1, testval=1, shape=self.n_arms)
            self.psi = pm.Normal(
                'psi',
                mu=0,
                sigma=1,
                testval=0.1,
                shape=self.n_arms,
            )
            self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5)
            # Unfortunately, as we need to reverse the theta points for arms
            # with n < 1, and rotate all arms to start at theta = 0,
            # we need to do some model-mangling
            self.t_mins = Series({
                i: self.data.query('arm_index == @i')['theta'].min()
                for i in np.unique(self.data['arm_index'])
            })
            r_stack = [
                self.a[i] * tt.power(
                    (self.data.query('arm_index == @i')['theta'].values -
                     self.t_mins[i] + self.psi[i]),
                    1 / self.n[int(self.gal_arm_map[i])])
                [::self.chirality_correction[int(self.gal_arm_map[i])]]
                for i in np.unique(self.data['arm_index'])
            ]
            r = pm.Deterministic('r', tt.concatenate(r_stack))
            self.likelihood = pm.StudentT(
                'Likelihood',
                mu=r,
                sigma=self.sigma_r,
                observed=self.data['r'].values,
            )
Esempio n. 9
0
def best(sample1, sample2, σ_range, exponential_m, n_iter=2000, n_jobs=2):
    y1 = np.array(sample1)
    y2 = np.array(sample2)
    y = pd.DataFrame(
        dict(value=np.r_[y1, y2],
             group=np.r_[["onboard"] * len(sample1),
                         ["spirit"] * len(sample2)]))
    μ_m = y.value.mean()
    μ_s = y.value.std() * 2

    with pm.Model() as model:
        group1_mean = pm.Normal("group1_mean", μ_m, sd=μ_s)
        group2_mean = pm.Normal("group2_mean", μ_m, sd=μ_s)
        σ_low, σ_high = σ_range
        group1_std = pm.Uniform("group1_std", lower=σ_low, upper=σ_high)
        group2_std = pm.Uniform("group2_std", lower=σ_low, upper=σ_high)
        ν = pm.Exponential("ν_minus_one", abs(1 / (exponential_m - 1))) + 1

        λ1 = group1_std**-2
        λ2 = group2_std**-2

        group1 = pm.StudentT("onboard",
                             nu=ν,
                             mu=group1_mean,
                             lam=λ1,
                             observed=y1)
        group2 = pm.StudentT("spirit",
                             nu=ν,
                             mu=group2_mean,
                             lam=λ2,
                             observed=y2)

        diff_of_means = pm.Deterministic("difference of means",
                                         group1_mean - group2_mean)
        diff_of_stds = pm.Deterministic("difference of stds",
                                        group1_std - group2_std)
        effect_size = pm.Deterministic(
            "effect size", diff_of_means / np.sqrt(
                (group1_std**2 + group2_std**2) / 2))
        trace = pm.sample(n_iter, init=None, njobs=n_jobs)
    return BestResult(trace, model)
Esempio n. 10
0
def make_stochastic_volatility_model(data):
    with pm.Model() as model:
        step_size = pm.Exponential("step_size", 10)
        volatility = pm.GaussianRandomWalk("volatility",
                                           sigma=step_size,
                                           shape=len(data))
        nu = pm.Exponential("nu", 0.1)
        returns = pm.StudentT("returns",
                              nu=nu,
                              lam=np.exp(-2 * volatility),
                              observed=data["change"])
    return model
Esempio n. 11
0
    def _build_model(self, observed_a, observed_b):
        self.model = pm.Model()
        with self.model as model:
            # normal priors for means
            mean_param_a = pm.Normal(self._varnames['mean_param_a'],
                                     self.mu_mean, self.mu_sd)
            mean_param_b = pm.Normal(self._varnames['mean_param_b'],
                                     self.mu_mean, self.mu_sd)

            # uniform priors standard deviations
            sd_param_a = pm.Uniform(self._varnames['sd_param_a'],
                                    self.sd_lower, self.sd_upper)
            sd_param_b = pm.Uniform(self._varnames['sd_param_b'],
                                    self.sd_lower, self.sd_upper)

            # shifted exponential prior for normality (aka 'degrees of freedim')
            nu = pm.Exponential(self._varnames['nu'], 1 / self.nu_mean) + 1

            # the data is assumed to come from Student's t distribution since it models data with outliers well
            # it is not realted to Student's t test in this case

            # pymc3 uses precision instead of sd for Student's t
            lambda_param_a = sd_param_a**-2
            lambda_param_b = sd_param_b**-2

            data_param_a = pm.StudentT('data_param_a',
                                       nu=nu,
                                       mu=mean_param_a,
                                       lam=lambda_param_a,
                                       observed=observed_a)
            data_param_b = pm.StudentT('data_param_b',
                                       nu=nu,
                                       mu=mean_param_b,
                                       lam=lambda_param_b,
                                       observed=observed_b)

            diff_means = pm.Deterministic(self._varnames['diff_means'],
                                          mean_param_a - mean_param_b)
            diff_sds = pm.Deterministic(self._varnames['diff_sds'],
                                        sd_param_a - sd_param_b)
    def build_model(self, name=''):
        # Define Stochastic variables
        with pm.Model(name=name) as self.model:
            # Global mean pitch angle
            self.phi_gal = pm.Uniform('phi_gal',
                                      lower=0,
                                      upper=90,
                                      shape=len(self.galaxies))
            # note we don't model inter-galaxy dispersion here
            # intra-galaxy dispersion
            self.sigma_gal = pm.InverseGamma('sigma_gal',
                                             alpha=2,
                                             beta=20,
                                             testval=5)
            # arm offset parameter
            self.c = pm.Cauchy('c',
                               alpha=0,
                               beta=10,
                               shape=self.n_arms,
                               testval=np.tile(0, self.n_arms))

            # radial noise
            self.sigma_r = pm.InverseGamma('sigma_r', alpha=2, beta=0.5)

            # define prior for Student T degrees of freedom
            # self.nu = pm.Uniform('nu', lower=1, upper=100)

            # Define Dependent variables
            self.phi_arm = pm.TruncatedNormal(
                'phi_arm',
                mu=self.phi_gal[self.gal_arm_map],
                sd=self.sigma_gal,
                lower=0,
                upper=90,
                shape=self.n_arms)

            # convert to a gradient for a linear fit
            self.b = tt.tan(np.pi / 180 * self.phi_arm)
            r = pm.Deterministic(
                'r',
                tt.exp(self.b[self.data['arm_index'].values] *
                       self.data['theta'] +
                       self.c[self.data['arm_index'].values]))

            # likelihood function
            self.likelihood = pm.StudentT(
                'Likelihood',
                mu=r,
                sigma=self.sigma_r,
                nu=1,  #self.nu,
                observed=self.data['r'],
            )
def test_compare():
    np.random.seed(42)
    x_obs = np.random.normal(0, 1, size=100)

    with pm.Model() as model0:
        mu = pm.Normal('mu', 0, 1)
        x = pm.Normal('x', mu=mu, sd=1, observed=x_obs)
        trace0 = pm.sample(1000)

    with pm.Model() as model1:
        mu = pm.Normal('mu', 0, 1)
        x = pm.Normal('x', mu=mu, sd=0.8, observed=x_obs)
        trace1 = pm.sample(1000)

    with pm.Model() as model2:
        mu = pm.Normal('mu', 0, 1)
        x = pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs)
        trace2 = pm.sample(1000)

    traces = [trace0, copy.copy(trace0)]
    models = [model0, copy.copy(model0)]

    model_dict = dict(zip(models, traces))

    w_st = pm.compare(model_dict, method='stacking')['weight']
    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']

    assert_almost_equal(w_st[0], w_st[1])
    assert_almost_equal(w_bb_bma[0], w_bb_bma[1])
    assert_almost_equal(w_bma[0], w_bma[1])

    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_bb_bma), 1.)
    assert_almost_equal(np.sum(w_bma), 1.)

    traces = [trace0, trace1, trace2]
    models = [model0, model1, model2]

    model_dict = dict(zip(models, traces))

    w_st = pm.compare(model_dict, method='stacking')['weight']
    w_bb_bma = pm.compare(model_dict, method='BB-pseudo-BMA')['weight']
    w_bma = pm.compare(model_dict, method='pseudo-BMA')['weight']

    assert (w_st[0] > w_st[1] > w_st[2])
    assert (w_bb_bma[0] > w_bb_bma[1] > w_bb_bma[2])
    assert (w_bma[0] > w_bma[1] > w_bma[2])

    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_st), 1.)
    assert_almost_equal(np.sum(w_st), 1.)
Esempio n. 14
0
    def time_drug_evaluation(self):
        drug = np.array([101, 100, 102, 104, 102, 97, 105, 105, 98, 101,
                         100, 123, 105, 103, 100, 95, 102, 106, 109, 102, 82,
                         102, 100, 102, 102, 101, 102, 102, 103, 103, 97, 97,
                         103, 101, 97, 104, 96, 103, 124, 101, 101, 100, 101,
                         101, 104, 100, 101])
        placebo = np.array([99, 101, 100, 101, 102, 100, 97, 101, 104, 101,
                            102, 102, 100, 105, 88, 101, 100, 104, 100, 100,
                            100, 101, 102, 103, 97, 101, 101, 100, 101, 99,
                            101, 100, 100, 101, 100, 99, 101, 100, 102, 99,
                            100, 99])

        y = pd.DataFrame({
            'value': np.r_[drug, placebo],
            'group': np.r_[['drug']*len(drug), ['placebo']*len(placebo)]
            })
        y_mean = y.value.mean()
        y_std = y.value.std() * 2

        sigma_low = 1
        sigma_high = 10
        with pm.Model():
            group1_mean = pm.Normal('group1_mean', y_mean, sd=y_std)
            group2_mean = pm.Normal('group2_mean', y_mean, sd=y_std)
            group1_std = pm.Uniform('group1_std', lower=sigma_low, upper=sigma_high)
            group2_std = pm.Uniform('group2_std', lower=sigma_low, upper=sigma_high)
            lambda_1 = group1_std**-2
            lambda_2 = group2_std**-2

            nu = pm.Exponential('ν_minus_one', 1/29.) + 1

            pm.StudentT('drug', nu=nu, mu=group1_mean, lam=lambda_1, observed=drug)
            pm.StudentT('placebo', nu=nu, mu=group2_mean, lam=lambda_2, observed=placebo)
            diff_of_means = pm.Deterministic('difference of means', group1_mean - group2_mean)
            pm.Deterministic('difference of stds', group1_std - group2_std)
            pm.Deterministic(
                'effect size', diff_of_means / np.sqrt((group1_std**2 + group2_std**2) / 2))
            pm.sample(draws=20000, cores=4, chains=4,
                      progressbar=False, compute_convergence_checks=False)
Esempio n. 15
0
def generate_alignment_distribution(name, sd, observed, nu=np.inf):
    '''Returns alignment probabilities given observations and errors 
    This is used to centralize defaults.     

    If nu is infinite or None, then a Normal distribution is used.
    If nu is any other value, a StudenT_{nu} distribution is used.
    '''
    if nu is np.inf or nu is None:
        align = pm.Normal(name, mu=0, sd=sd, observed=observed)
    else:
        align = pm.StudentT(name, nu=nu, mu=0, sd=sd, observed=observed)

    return align
Esempio n. 16
0
def exponential_model(training_data_df):
    logreturns = training_data_df['logret'].as_matrix()
    with pm.Model() as model_obj:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(logreturns))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.math.exp(-2 * s))
        r = pm.StudentT('r',
                        nu,
                        lam=1 / volatility_process,
                        observed=logreturns)
    return model_obj
Esempio n. 17
0
 def _build_returns_factors(self):
     self.dims.update({
         'factor_algo': ('factor', 'algo'),
     })
     factors = self.factors
     n_algos, n_factors = self.n_algos, self.n_factors
     factor_algo = pm.StudentT('factor_algo',
                               nu=3,
                               mu=0,
                               sd=2,
                               shape=(n_factors, n_algos))
     return (factor_algo[:, None, :] *
             factors.values.T[:, :, None]).sum(0).T
Esempio n. 18
0
def hierarchical_normal(name,
                        shape,
                        mu=None,
                        group_mu_variance=5,
                        intra_group_variance=1):
    """
    Credit to Austin Rochford: https://www.austinrochford.com/posts/2018-12-20-sports-precision.html
    """
    if mu is None:
        mu = pm.Normal(f"mu_{name}", 0.0, group_mu_variance)

    delta = pm.StudentT(f"delta_{name}", nu=1, shape=shape)
    sigma = pm.HalfCauchy(f"sigma_{name}", intra_group_variance)
    return pm.Deterministic(name, mu + delta * sigma)
Esempio n. 19
0
    def get_model(self, data: xr.Dataset) -> pm.Model:
        # transpose the dataset to ensure that it is the way we expect
        data = data.transpose("item", "feature")

        with pm.Model() as model:
            X = pm.Data("x_obs", data.X.values)
            Y = pm.Data("y_obs", data.Y.values)

            alpha = pm.Normal("alpha", mu=0, sd=self.alpha_scale)
            beta = pm.Normal("beta", mu=self.beta_loc, sd=self.beta_scale, shape=self.k)
            nu = pm.Gamma("nu", alpha=2, beta=0.1)
            sigma = pm.Exponential("sigma", lam=1 / self.sigma_mean)
            mu = alpha + X.dot(beta)
            pm.StudentT("Y", nu=nu, mu=mu, sigma=sigma, observed=Y, shape=self.n)

        return model
    def _build_model(self, X, y, **kwargs):
        with pm.Model() as model:
            # priors
            alpha = pm.Normal('alpha', mu=0, sigma=1e5)
            beta = pm.Normal('beta', mu=0, sigma=1e5)
            sigma = pm.HalfNormal('sigma', sigma=1e5)

            # mean: linear regression
            mu = alpha + beta * X  # alpha + pm.math.dot(beta, X)

            # degree of freedom
            nu = pm.Exponential('nu', 1 / 30)

            # observations
            pm.StudentT('y', mu=mu, sigma=sigma, nu=nu, observed=y)

        return model
Esempio n. 21
0
def load_trace(dir_path, bX, by):
    with pm.Model() as model:  # noqa
        length = pm.Gamma("length", alpha=2, beta=1)
        eta = pm.HalfCauchy("eta", beta=5)

        cov = eta**2 * pm.gp.cov.Matern52(input_dim=1, ls=length)
        gp = pm.gp.Latent(cov_func=cov)

        f = gp.prior("f", X=bX)

        sigma = pm.HalfCauchy("sigma", beta=5)
        nu = pm.Gamma("nu", alpha=2, beta=0.1)
        y_ = pm.StudentT("y", mu=f, lam=1.0 / sigma, nu=nu,
                         observed=by)  # noqa

        trace = pm.load_trace(dir_path)
        return model, gp, trace
def main():

    data = np.array([
        51.06, 55.12, 53.73, 50.24, 52.05, 56.40, 48.45, 52.34, 55.65, 51.49,
        51.86, 63.43, 53.00, 56.09, 51.93, 52.31, 52.33, 57.48, 57.44, 55.14,
        53.93, 54.62, 56.09, 68.58, 51.36, 55.47, 50.73, 51.94, 54.95, 50.39,
        52.91, 51.5, 52.68, 47.72, 49.73, 51.82, 54.99, 52.84, 53.19, 54.52,
        51.46, 53.73, 51.61, 49.81, 52.42, 54.3, 53.84, 53.16
    ])

    # look at the distribution of the data
    sns.kdeplot(data)

    # All these distributions are used to model std
    # It is safe to use exponential
    # half cauchy has a fat tail
    # Exponential parameter lambda high indicates a high steep
    # Ineverse gamma
    with pm.Model() as model:
        mu = pm.Uniform('mu', 30, 80)
        sigma = pm.HalfNormal('sigma', sd=10)
        df = pm.Exponential(
            'df', 1.5)  # lamda = 1.5, it will be more steep, 0.5 less
        output = pm.StudentT('output',
                             mu=mu,
                             sigma=sigma,
                             nu=df,
                             observed=data)

        trace = pm.sample(1000)

        # gelman rubin
        pm.gelman_rubin(trace)

        # forestplot
        pm.forestplot(trace)

        # summary [look at mc error here. This is the std error, should be low]
        pm.summary(trace)

        #autocorrelation
        pm.autocorrplot(trace)

        # effective size
        pm.effective_n(trace)
Esempio n. 23
0
    def setup_class(cls):
        np.random.seed(42)
        x_obs = np.random.normal(0, 1, size=100)

        with pm.Model() as cls.model0:
            mu = pm.Normal('mu', 0, 1)
            pm.Normal('x', mu=mu, sd=1, observed=x_obs)
            cls.trace0 = pm.sample(1000)

        with pm.Model() as cls.model1:
            mu = pm.Normal('mu', 0, 1)
            pm.Normal('x', mu=mu, sd=0.8, observed=x_obs)
            cls.trace1 = pm.sample(1000)

        with pm.Model() as cls.model2:
            mu = pm.Normal('mu', 0, 1)
            pm.StudentT('x', nu=1, mu=mu, lam=1, observed=x_obs)
            cls.trace2 = pm.sample(1000)
    def fit(self, n_steps=50000):
        """
        Creates a Bayesian Estimation model for replicate measurements of
        treatment(s) vs. control.

        Parameters
        ----------
        n_steps : int
            The number of steps to run ADVI.
        """

        sample_names = set(self.data[self.sample_col].values)

        with pm.Model() as model:
            # Hyperpriors
            # upper = pm.Exponential('upper', lam=0.05)
            nu = pm.Exponential('nu_minus_one', 1/29.) + 1

            # "fold", which is the estimated fold change.
            fold = pm.Flat('fold', shape=len(sample_names))

            # Assume that data have heteroskedastic (i.e. variable) error but
            # are drawn from the same HalfCauchy distribution.
            sigma = pm.HalfCauchy('sigma', beta=1, shape=len(sample_names))

            # Model prediction
            mu = fold[self.data['indices'].values]
            sig = sigma[self.data['indices'].values]

            # Data likelihood
            like = pm.StudentT('like', nu=nu, mu=mu, sd=sig**-2,
                               observed=self.data[self.output_col])

            # Sample from posterior
            v_params = pm.variational.advi(n=n_steps)
            start = pm.variational.sample_vp(v_params, 1)[0]
            cov = np.power(model.dict_to_array(v_params.stds), 2)
            step = pm.NUTS(scaling=cov, is_cov=True)
            logging.info('Starting MCMC sampling')
            trace = pm.sample(step=step, start=start, draws=2000)

        self.trace = trace
        self.model = model
Esempio n. 25
0
 def _build_gains_factors(self):
     self.dims.update({
         'gains_factor_algo': ('gains_factor', 'algo'),
         'gains_factor_algo_raw': ('gains_factor', 'algo'),
         'gains_factor_algo_sd': ('gains_factor', ),
     })
     gains_factors = self.gains_factors
     n_algos, n_gains_factors = self.n_algos, self.n_gains_factors
     sd = pm.HalfNormal('gains_factor_algo_sd',
                        sd=0.4,
                        shape=n_gains_factors)
     raw = pm.StudentT('gains_factor_algo_raw',
                       nu=7,
                       mu=0,
                       sd=1,
                       shape=(n_gains_factors, n_algos))
     vals = sd[:, None] * raw
     pm.Deterministic('gains_factor_algo', vals)
     return (vals[:, None, :] * gains_factors.values.T[:, :, None]).sum(0).T
Esempio n. 26
0
def model_returns_t(data, samples=500, progressbar=True):
    """
    Run Bayesian model assuming returns are Student-T distributed.

    Compared with the normal model, this model assumes returns are
    T-distributed and thus have a 3rd parameter (nu) that controls the
    mass in the tails.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """

    with pm.Model() as model:
        mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
        sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=3.)

        returns = pm.StudentT('returns',
                              nu=nu + 2,
                              mu=mu,
                              sd=sigma,
                              observed=data)
        pm.Deterministic('annual volatility',
                         returns.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic(
            'sharpe', returns.distribution.mean /
            returns.distribution.variance**.5 * np.sqrt(252))

        trace = pm.sample(samples, progressbar=progressbar)
    return model, trace
Esempio n. 27
0
def estimate_student(normalized_ranks):
    """This fits a PyMC3 model. All the model does is
    fit the parameters for t distribution, since it is clear
    (in the authors opinion) that the logit-transformed ranks 
    are very well described by a t distribution. The logit
    ranks are thus the observations, and the model finds the 
    ranges of parameters consistent with those obs."""

    with pm.Model() as model:
        nu = pm.HalfNormal('nu', 50)  #very broad priors
        mu = pm.Normal('mu', mu=0, sigma=50)  #very broad priors
        sigma = pm.HalfNormal('sig', 50)  #very broad priors

        lik = pm.StudentT('t',
                          nu=nu,
                          mu=mu,
                          sigma=sigma,
                          observed=logit(normalized_ranks))
        trace = pm.sample(1000, tune=1000)
    return trace, model
    def build(self):
        with pm.Model() as model:
            w = pm.Lognormal('lengthscale', 0, 4)
            h2 = pm.Lognormal('variance', 0, 4)
            # sigma = pm.Lognormal('sigma', 0, 4)
            p = pm.Lognormal('p', 5, 4)

            f_cov = h2 * pm.gp.cov.Periodic(1, period=p, ls=w)
            gp = pm.gp.Latent(cov_func=f_cov)
            f = gp.prior('f', X=self.X_train)
            s2 = pm.Lognormal('Gaussian_noise', -4, 4)
            y_ = pm.StudentT('y', mu=f, nu=s2, observed=self.Y_train)
            #start = pm.find_MAP()
            step = pm.Metropolis()
            db = pm.backends.Text('trace')
            trace = pm.sample(2000, step, chains=1, njobs=1)# start=start)

        pm.traceplot(trace, varnames=['lengthscale', 'variance', 'Gaussian_noise'])
        plt.show()
        return trace
Esempio n. 29
0
def _init_model(X, y):
    """
    exmaple and default specification of a model
    specify a linear regression model
    :param X:
    :param y:
    :return:
    """
    with pm.Model() as model:
        # Define hyper-prior
        alpha = pm.Gamma("alpha", alpha=1e-2, beta=1e-4)

        # Define priors'
        w = pm.Normal("w", mu=0, sd=alpha, shape=X.get_value().shape[1])
        sigma = pm.HalfCauchy("sigma", beta=10)
        mu = tt.dot(w, X.T)

        # Define likelihood
        likelihood = pm.StudentT("y", nu=1, mu=mu, lam=sigma, observed=y)
    return model
Esempio n. 30
0
def model_stoch_vol(data, samples=2000, progressbar=True):
    """
    Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.math.exp(-2 * s))
        pm.StudentT('r', nu, lam=volatility_process, observed=data)

        trace = pm.sample(samples, progressbar=progressbar)

    return model, trace