Ejemplo n.º 1
0
 def test_zeroinflatedpoisson(self):
     with pm.Model():
         theta = pm.Beta("theta", alpha=1, beta=1)
         psi = pm.HalfNormal("psi", sd=1)
         pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, shape=20)
         gen_data = pm.sample_prior_predictive(samples=5000)
         assert gen_data["theta"].shape == (5000,)
         assert gen_data["psi"].shape == (5000,)
         assert gen_data["suppliers"].shape == (5000, 20)
 def test_n_obj_mc(self):
     n_samples = 100
     xs = np.random.binomial(n=1, p=0.2, size=n_samples)
     with pm.Model():
         p = pm.Beta('p', alpha=1, beta=1)
         pm.Binomial('xs', n=1, p=p, observed=xs)
         inf = self.inference(scale_cost_to_minibatch=True)
         # should just work
         inf.fit(10, obj_n_mc=10, obj_optimizer=self.optimizer)
Ejemplo n.º 3
0
def deathPriors(numApop):
    """ Setup priors for cell death parameters. """
    # Rate of moving from apoptosis to death, assumed invariant wrt. treatment
    d = pm.Lognormal("d", np.log(0.001), 0.5)

    # Fraction of dying cells that go through apoptosis
    apopfrac = pm.Beta("apopfrac", 1.0, 1.0, shape=numApop)

    return d, apopfrac
Ejemplo n.º 4
0
def hierarchical_beta(name, name_sigma, pr_mean, pr_sigma, len_L2):

    if not len_L2:  # not hierarchical
        Y = pm.Beta(name,
                    alpha=pr_mean / pr_sigma,
                    beta=1 / pr_sigma * (1 - pr_mean))
        X = None
    else:
        sigma_Y = pm.HalfCauchy(name_sigma + "_L2", beta=pr_sigma)
        X = pm.Beta(name + "_L1",
                    alpha=pr_mean / pr_sigma,
                    beta=1 / pr_sigma * (1 - pr_mean))
        Y = pm.Beta(name + "_L2",
                    alpha=X / sigma_Y,
                    beta=1 / sigma_Y * (1 - X),
                    shape=len_L2)

    return Y, X
Ejemplo n.º 5
0
 def test_zeroinflatedpoisson(self):
     with pm.Model():
         theta = pm.Beta('theta', alpha=1, beta=1)
         psi = pm.HalfNormal('psi', sd=1)
         pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20)
         gen_data = pm.sample_prior_predictive(samples=5000)
         assert gen_data['theta'].shape == (5000, )
         assert gen_data['psi'].shape == (5000, )
         assert gen_data['suppliers'].shape == (5000, 20)
Ejemplo n.º 6
0
def test_pymc3_convert_dists():
    """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs."""
    tt.config.compute_test_value = "ignore"
    theano.config.cxx = ""

    with pm.Model() as model:
        norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0)
        mvnorm_rv = pm.MvNormal("mvnorm_rv",
                                np.r_[0.0],
                                np.c_[1.0],
                                shape=1,
                                observed=np.r_[1.0])
        cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0)
        halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0)
        uniform_rv = pm.Uniform("uniform_rv", observed=1.0)
        gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0)
        invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0)
        exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0)
        halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0)
        beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0)
        binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5)
        dirichlet_rv = pm.Dirichlet("dirichlet_rv",
                                    np.r_[0.1, 0.1],
                                    observed=np.r_[0.1, 0.1])
        poisson_rv = pm.Poisson("poisson_rv", 10, observed=5)
        bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0)
        betabinomial_rv = pm.BetaBinomial("betabinomial_rv",
                                          0.1,
                                          0.1,
                                          10,
                                          observed=5)
        categorical_rv = pm.Categorical("categorical_rv",
                                        np.r_[0.5, 0.5],
                                        observed=1)
        multinomial_rv = pm.Multinomial("multinomial_rv",
                                        5,
                                        np.r_[0.5, 0.5],
                                        observed=np.r_[2])

    # Convert to a Theano `FunctionGraph`
    fgraph = model_graph(model)

    rvs_by_name = {
        n.owner.inputs[1].name: n.owner.inputs[1]
        for n in fgraph.outputs
    }

    pymc_rv_names = {n.name for n in model.observed_RVs}
    assert all(
        isinstance(rvs_by_name[n].owner.op, RandomVariable)
        for n in pymc_rv_names)

    # Now, convert back to a PyMC3 model
    pymc_model = graph_model(fgraph)

    new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs}
    pymc_rv_names == new_pymc_rv_names
Ejemplo n.º 7
0
def ab_test(obs, return_p, dnase_sense, dnase_antisense, naoh_sense,
            naoh_antisense):
    '''
    Modeling empirical beta distribytion and use that as prior
    add new evidence (NaOH or DNase)
    sample delta strand
    calulate bayes factor for DNase > NaOH by at least 10%
    '''
    with pm.Model() as model:

        # fit beta binom
        naoh_empirical_alpha = pm.Exponential('alpha', 1)
        naoh_empirical_beta = pm.Exponential('beta', 1)
        beta_binom_prior = pm.Beta('beta_prior',
                                   naoh_empirical_alpha,
                                   naoh_empirical_beta,
                                   observed=obs)
        alpha = pm.Normal('alpha1', mu=naoh_empirical_alpha, sd=1)
        beta = pm.Normal('beta1', mu=naoh_empirical_beta, sd=1)

        #inference
        dnase_sense = pm.Beta('dnase_sense',
                              alpha=alpha + dnase_sense,
                              beta=beta + dnase_antisense)
        naoh_sense = pm.Beta('naoh_sense',
                             alpha=alpha + naoh_sense,
                             beta=beta + naoh_antisense)
        diff = pm.Deterministic('delta', dnase_sense - naoh_sense)
        step = pm.NUTS()

        progressbar = not return_p
        trace = pm.sample(1000, step, tune=1000, progressbar=True, cores=24)

    if return_p:
        delta = trace['delta']
        h1 = np.sum(delta >= 0.1)
        h0 = np.sum(delta <= 0)

        p_h0 = h0 / len(delta)
        p_h1 = h1 / len(delta)
        bf = p_h1 / p_h0 if p_h0 > 0 else EPSILON
        return bf, delta.mean()
    else:
        return trace
def stickbreak_prior(name, a, shape):
    """truncated stick-breaking construction"""
    gamma = pm.Gamma('gamma_{}'.format(name), 1., 1.)
    delta = pm.Gamma('delta_{}'.format(name), 1., a)
    beta_prime = tt.stack([
        pm.Beta('beta_prime_{}_{}'.format(name, k), 1., gamma)
        for k in range(shape)
    ])
    beta = GEM(beta_prime)
    return (beta * delta)
Ejemplo n.º 9
0
 def __init__(self, bandit, policy, ts=True):
     super(BetaAgent, self).__init__(bandit, policy)
     self.n = bandit.n
     self.ts = ts
     self.model = pm.Model()
     with self.model:
         self._prior = pm.Beta('prior', alpha=np.ones(self.k),
                               beta=np.ones(self.k), shape=(1, self.k),
                               transform=None)
     self._value_estimates = np.zeros(self.k)
Ejemplo n.º 10
0
    def sample_posterior(self, t, T, n_samp, n_burnin=None):
        """
        Get samples from the posterior, e.g. for posterior inference or computing Bayesian credible intervals.
        This routine samples via the random walk Metropolis (RWM) algorithm using the ``pymc3`` library.

        The function returns a ``pymc3.MultiTrace`` object that can be operated on simply like a ``numpy.array``.
        Furthermore, ``pymc3`` can be used to create "traceplots". For example via

        .. code-block:: python

            from matplotlib import pyplot as plt
            import pymc3

            trace = uvb.fit(t, T)
            pymc3.traceplot(trace["mu"])

            plt.plot(trace["mu"], trace["alpha"])

        :param numpy.array[float] t: Observation timestamps of the process up to time T. 1-d array of timestamps.
            must be sorted (asc)
        :param T: (optional) maximum time
        :type T: float or None
        :param int n_samp: number of posterior samples to take
        :param int n_burnin: number of samples to discard (as the burn-in samples)

        :rtype: pymc3.MultiTrace
        :return: the posterior samples for mu, alpha and theta as a trace object
        """

        t, T = self._prep_t_T(t, T)

        if n_burnin is None:
            n_burnin = int(n_samp / 5)

        with pm.Model() as model:
            mu = pm.Gamma("mu", alpha=self.mu_hyp[0], beta=1. / self.mu_hyp[1])
            theta = pm.Gamma("theta",
                             alpha=self.theta_hyp[0],
                             beta=1. / self.theta_hyp[1])
            alpha = pm.Beta("alpha",
                            alpha=self.alpha_hyp[0],
                            beta=self.alpha_hyp[1])

            op = HPLLOp(t, T)
            a = pm.Deterministic('a', op(mu, alpha, theta))
            llop = pm.Potential('ll', a)

            trace = pm.sample(n_samp,
                              step=pm.Metropolis(),
                              cores=1,
                              nchains=1,
                              tune=n_burnin,
                              discard_tuned_samples=True)

        return trace[n_burnin:]
Ejemplo n.º 11
0
    def test_transformed(self):
        n = 18
        at_bats = 45 * np.ones(n, dtype=int)
        hits = np.random.randint(1, 40, size=n, dtype=int)
        draws = 50

        with pm.Model() as model:
            phi = pm.Beta("phi", alpha=1.0, beta=1.0)

            kappa_log = pm.Exponential("logkappa", lam=5.0)
            kappa = pm.Deterministic("kappa", aet.exp(kappa_log))

            thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=n)

            y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits)
            gen = pm.sample_prior_predictive(draws)

        assert gen["phi"].shape == (draws,)
        assert gen["y"].shape == (draws, n)
        assert "thetas_logodds__" in gen
Ejemplo n.º 12
0
def build_model(X1, X2, timeV, conv0=0.1, confl=None, apop=None, dna=None):
    """ Builds then returns the PyMC model. """

    assert X1.shape == X2.shape

    M = pm.Model()

    with M:
        conversions = conversionPriors(conv0)
        d, apopfrac = deathPriors(1)

        # parameters for drug 1, 2; assumed to be the same for both phenotypes
        hill = pm.Lognormal("hill", shape=2)
        IC50 = pm.Lognormal("IC50", shape=2)
        EmaxGrowth = pm.Beta("EmaxGrowth", 1.0, 1.0, shape=2)
        EmaxDeath = pm.Lognormal("EmaxDeath", -2.0, 0.5, shape=2)

        # E_con values; first death then growth
        GrowthCon = pm.Lognormal("GrowthCon", np.log10(0.03), 0.1)

        # Calculate the death rate
        death_rates = blissInteract(X1, X2, hill, IC50, EmaxDeath, justAdd=True)  # pylint: disable=unsubscriptable-object

        # Calculate the growth rate
        growth_rates = GrowthCon * (1 - blissInteract(X1, X2, hill, IC50, EmaxGrowth))  # pylint: disable=unsubscriptable-object
        pm.Deterministic("EmaxGrowthEffect", GrowthCon * EmaxGrowth)

        # Test the dimension of growth_rates
        growth_rates = T.opt.Assert("growth_rates did not match X1 size")(growth_rates, T.eq(growth_rates.size, X1.size))

        lnum, eap, deadapop, deadnec = theanoCore(timeV, growth_rates, death_rates, apopfrac, d)

        # Test the size of lnum
        lnum = T.opt.Assert("lnum did not match X1*timeV size")(lnum, T.eq(lnum.size, X1.size * timeV.size))

        confl_exp, apop_exp, dna_exp = convSignal(lnum, eap, deadapop, deadnec, conversions)

        # Compare to experimental observation
        if confl is not None:
            confl_obs = T.flatten(confl_exp - confl)
            pm.Normal("confl_fit", sd=T.std(confl_obs), observed=confl_obs)
            conflmean = T.mean(confl, axis=1)
            confl_exp_mean = T.mean(confl_exp, axis=1)
            pm.Deterministic("conflResid", (confl_exp_mean - conflmean) / conflmean[0])

        if apop is not None:
            apop_obs = T.flatten(apop_exp - apop)
            pm.Normal("apop_fit", sd=T.std(apop_obs), observed=apop_obs)

        if dna is not None:
            dna_obs = T.flatten(dna_exp - dna)
            pm.Normal("dna_fit", sd=T.std(dna_obs), observed=dna_obs)

    return M
Ejemplo n.º 13
0
  def test_1latent(self):
    with self.test_session():
      x_obs = theano.shared(np.zeros(1))
      with pm.Model() as pm_model:
        p = pm.Beta('p', 1, 1, transform=None)
        x = pm.Bernoulli('x', p, observed=x_obs)

      model = PyMC3Model(pm_model)
      data = {x_obs: np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1])}
      zs = {'p': np.array(0.5)}
      _test(model, data, zs)
Ejemplo n.º 14
0
def hierarchical_beta(name, name_sigma, pr_mean, pr_sigma, len_L2, model=None):

    model = modelcontext(model)

    if not model.is_hierarchical:  # not hierarchical
        Y = pm.Beta(name,
                    alpha=pr_mean / pr_sigma,
                    beta=1 / pr_sigma * (1 - pr_mean))
        X = None
    else:
        sigma_Y = pm.HalfCauchy(name_sigma + "_hc_L2", beta=pr_sigma)
        X = pm.Beta(name + "_hc_L1",
                    alpha=pr_mean / pr_sigma,
                    beta=1 / pr_sigma * (1 - pr_mean))
        Y = pm.Beta(name + "_hc_L2",
                    alpha=X / sigma_Y,
                    beta=1 / sigma_Y * (1 - X),
                    shape=len_L2)

    return Y, X
Ejemplo n.º 15
0
 def build_model(self):
     with pm.Model() as model:
         # Estimated occupancy
         psi = pm.Beta('psi', 1, 1)
         # Latent variable for occupancy
         pm.Bernoulli('z', psi, self.y.shape)
         # Estimated mean count
         theta = pm.Uniform('theta', 0, 100)
         # Poisson likelihood
         pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y)
     return model
Ejemplo n.º 16
0
def case_count_model_us_states(df):

    # Normalize inputs in a way that is sensible:

    # People per test: normalize to South Korea
    # assuming S.K. testing is "saturated"
    ppt_sk = np.log10(51500000. / 250000)
    df['people_per_test_normalized'] = (
        np.log10(df['people_per_test_7_days_ago']) - ppt_sk)

    n = len(df)

    # For each country, let:
    # c_obs = number of observed cases
    c_obs = df['num_pos_7_days_ago'].values
    # c_star = number of true cases

    # d_obs = number of observed deaths
    d_obs = df[['death', 'num_pos_7_days_ago']].min(axis=1).values
    # people per test
    people_per_test = df['people_per_test_normalized'].values

    covid_case_count_model = pm.Model()

    with covid_case_count_model:

        # Priors:
        mu_0 = pm.Beta('mu_0', alpha=1, beta=100, testval=0.01)
        # sig_0 = pm.Uniform('sig_0', lower=0.0, upper=mu_0 * (1 - mu_0))
        alpha = pm.Bound(pm.Normal, lower=0.0)(
            'alpha', mu=8, sigma=3, shape=1)
        beta = pm.Bound(pm.Normal, upper=0.0)(
            'beta', mu=-1, sigma=1, shape=1)
        # beta = pm.Normal('beta', mu=0, sigma=1, shape=3)
        sigma = pm.HalfNormal('sigma', sigma=0.5, testval=0.1)
        # sigma_1 = pm.HalfNormal('sigma_1', sigma=2, testval=0.1)

        # Model probability of case under-reporting as logistic regression:
        mu_model_logit = alpha + beta * people_per_test
        tau_logit = pm.Normal('tau_logit',
                              mu=mu_model_logit,
                              sigma=sigma,
                              shape=n)
        tau = np.exp(tau_logit) / (np.exp(tau_logit) + 1)

        c_star = c_obs / tau

        # Binomial likelihood:
        d = pm.Binomial('d',
                        n=c_star,
                        p=mu_0,
                        observed=d_obs)

    return covid_case_count_model
Ejemplo n.º 17
0
 def build_model(self):
     with pm.Model() as model:
         # Estimated occupancy
         psi = pm.Beta("psi", 1, 1)
         # Latent variable for occupancy
         pm.Bernoulli("z", psi, shape=self.y.shape)
         # Estimated mean count
         theta = pm.Uniform("theta", 0, 100)
         # Poisson likelihood
         pm.ZeroInflatedPoisson("y", psi, theta, observed=self.y)
     return model
Ejemplo n.º 18
0
 def test_sample_vp(self):
     n_samples = 100
     xs = np.random.binomial(n=1, p=0.2, size=n_samples)
     with pm.Model():
         p = pm.Beta('p', alpha=1, beta=1)
         pm.Binomial('xs', n=1, p=p, observed=xs)
         v_params = advi(n=1000)
         trace = sample_vp(v_params, draws=1, hide_transformed=True)
         self.assertListEqual(trace.varnames, ['p'])
         trace = sample_vp(v_params, draws=1, hide_transformed=False)
         self.assertListEqual(sorted(trace.varnames), ['p', 'p_logodds_'])
Ejemplo n.º 19
0
    def test_transformed(self):
        n = 18
        at_bats = 45 * np.ones(n, dtype=int)
        hits = np.random.randint(1, 40, size=n, dtype=int)
        draws = 50

        with pm.Model() as model:
            phi = pm.Beta('phi', alpha=1., beta=1.)

            kappa_log = pm.Exponential('logkappa', lam=5.)
            kappa = pm.Deterministic('kappa', tt.exp(kappa_log))

            thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=n)

            y = pm.Binomial('y', n=at_bats, p=thetas, observed=hits)
            gen = pm.sample_prior_predictive(draws)

        assert gen['phi'].shape == (draws,)
        assert gen['y'].shape == (draws, n)
        assert 'thetas_logodds__' in gen
Ejemplo n.º 20
0
def runModel(df_Train, df_Val, i, t, param, smpls, burns):

    dataTrn = df_Train[i]
    X = dataTrn[param].values     
    t_idx = dataTrn.Hour.astype(int).values
    
    dataVal = df_Val[i]
    validate = pd.DataFrame(np.transpose(np.array([dataVal.Hour, dataVal[param].values])), columns=['hr','y'])
    validate = validate.groupby('hr').mean()
    validate['int'] = np.round(validate.y.values)
    
    # define bernoulli hierarchical model
    with pm.Model() as model:
    # define the hyperparameters
        mu = pm.Beta('mu', 2, 2)
        #mu = pm.Beta('mu', 0.5, 0.5)
        kappa = pm.Gamma('kappa', 1, 0.1)
        # define the prior
        theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=t)
        # define the likelihood
        y_lik = pm.Bernoulli('y_like', p=theta[t_idx], observed=X)
    
    #   Generate a MCMC chain
        trace = pm.sample(smpls, chains=4, tune=burns, cores=1)
        ppc = pm.sample_posterior_predictive(trace)
        
    out_smry = pd.DataFrame(pm.summary(trace))   
    
    ppcMean = np.array((t_idx, np.mean(ppc['y_like'], axis=0)))
    ppcStd = np.array((t_idx, np.std(ppc['y_like'], axis=0)))
    ppc_all = np.append(np.reshape(t_idx, (-1, 1)), ppc['y_like'].T, axis=1)
    predVals = pd.DataFrame(np.transpose(ppcMean), columns=['hr', 'y'])
    predVals = predVals.groupby('hr').mean()
    predVals['int'] = np.round(predVals.y.values)
                               
    # Calculate SMAPE Error                       
    err_y = np.round(SMAPE(validate.y, predVals.y),4)
    err_int = np.round(SMAPE(validate.int, predVals.int),4)
    print('\n Error: ', (err_y, err_int), '\n')
    
    return trace, ppc_all, out_smry, [err_y, err_int]
Ejemplo n.º 21
0
def gev0_shift_1(dataset):

    locm = dataset.mean()
    locs = dataset.std() / (np.sqrt(len(dataset)))
    scalem = dataset.std()
    scales = dataset.std() / (np.sqrt(2 * (len(dataset) - 1)))
    with pm.Model() as model:
        # Priors for unknown model parameters
        c1 = pm.Beta(
            'c1', alpha=6, beta=9
        )  # c=x-0,5: transformation in gev_logp is required due to Beta domain between 0 and 1
        loc1 = pm.Normal('loc1', mu=locm, sd=locs)
        scale1 = pm.Normal('scale1', mu=scalem, sd=scales)

        c2 = pm.Beta('c2', alpha=6, beta=9)
        loc2 = pm.Normal('loc2', mu=locm, sd=locs)
        scale2 = pm.Normal('scale2', mu=scalem, sd=scales)

        def gev_logp(value):
            scaled = (value - loc_) / scale_
            logp = -(tt.log(scale_) +
                     (((c_ - 0.5) + 1) / (c_ - 0.5) * tt.log1p(
                         (c_ - 0.5) * scaled) +
                      (1 + (c_ - 0.5) * scaled)**(-1 / (c_ - 0.5))))
            bound1 = loc_ - scale_ / (c_ - 0.5)
            bounds = tt.switch((c_ - 0.5) > 0, value > bound1, value < bound1)
            return bound(logp, bounds, c_ != 0)

        tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1)
        idx = np.arange(n_count_data)
        c_ = pm.math.switch(tau > idx, c1, c2)
        loc_ = pm.math.switch(tau > idx, loc1, loc2)
        scale_ = pm.math.switch(tau > idx, scale1, scale2)
        gev = pm.DensityDist('gev', gev_logp, observed=dataset)
        trace = pm.sample(1000, chains=1, progressbar=True)

    # geweke_plot = pm.geweke(trace, 0.05, 0.5, 20)
    # gelman_and_rubin = pm.diagnostics.gelman_rubin(trace)
    posterior = pm.trace_to_dataframe(trace)
    summary = pm.summary(trace)
    return summary, posterior
Ejemplo n.º 22
0
def model_uncertainty(splits, stakes, actions, temp=1., sd=1.):
    with pm.Model() as repeated_model:
        r = pm.Gamma('r', alpha=1, beta=1)
        p = pm.Gamma('p', alpha=1, beta=1)
        t = pm.Beta('t', alpha=2, beta=5)
        st = pm.Beta('st', alpha=1, beta=1)
        c = pm.Gamma('c', alpha=1, beta=1)
        odds_a = np.exp(2 * r * splits + c * stakes**st)
        odds_r = np.exp(p * (splits < 0.5 - t / 2))
        p = odds_a / (odds_r + odds_a)
        a = pm.Binomial('a', 1, p, observed=actions)
        fitted = pm.fit(method='advi')
        trace_repeated = fitted.sample(2000)
        # trace_repeated = pm.sample(200000, step=pm.Slice(), chains=2, cores=4)

    # with pm.Model() as simple_model:
    #   r = pm.Normal('r', mu=0, sd=1)
    #   p = np.exp(r*splits) / (1 + np.exp(r*splits))
    #   a = pm.Binomial('a', 1, p, observed=actions)
    #   trace_simple = pm.sample(2000, init='map')

    with pm.Model() as fairness_model:
        r = pm.Gamma('r', alpha=1, beta=1)
        t = pm.Beta('t', alpha=2, beta=5)
        f = pm.Normal('f', mu=0, sd=sd)
        st = pm.Beta('st', alpha=1, beta=1)
        c = pm.Gamma('c', alpha=1, beta=1)
        odds = np.exp(c * stakes**st + splits * r - f * (splits < 0.5 - t / 2))
        p = odds / (1 + odds)
        a = pm.Binomial('a', 1, p, observed=actions)
        fitted = pm.fit(method='advi')
        trace_fairness = fitted.sample(2000)
        # trace_fairness = pm.sample(200000, step=pm.Slice(), chains=2, cores=4)

    fairness_model.name = 'fair'
    repeated_model.name = 'repeated'
    model_dict = dict(
        zip([fairness_model, repeated_model],
            [trace_fairness, trace_repeated]))
    comp = pm.compare(model_dict, ic='LOO', method='BB-pseudo-BMA')
    return trace_fairness, trace_repeated, comp
    def test_bernoulli_process(self):
        """Testing the Bridge Sampler with a Beta-Bernoulli-Process model"""

        # prior parameters
        alpha = np.random.gamma(1.0, 2.0)
        beta = np.random.gamma(1.0, 2.0)
        n = 100

        draws = 10000
        tune = 1000

        print("Testing with alpha = ", alpha, "and beta = ", beta)

        # random data
        p0 = np.random.random()
        expected_error = np.sqrt(p0 * (1 - p0) / n)  # reasonable approximation

        observations = (np.random.random(n) <= p0).astype("int")

        with pm.Model() as BernoulliBeta:

            theta = pm.Beta('pspike', alpha=alpha, beta=beta)
            obs = pm.Categorical('obs',
                                 p=pm.math.stack([theta, 1.0 - theta]),
                                 observed=observations)
            trace = pm.sample(draws=draws, tune=tune)

        # calculate exact marginal likelihood
        n = len(observations)
        k = sum(observations)
        print(n, k)
        exact_log_marg_ll = spf.betaln(alpha + k, beta +
                                       (n - k)) - spf.betaln(alpha, beta)

        # estimate with bridge sampling
        logml_dict = marginal_llk(trace, model=BernoulliBeta, maxiter=10000)
        expected_p = 1.0 - trace["pspike"].mean()

        # should be true in 95% of the runs
        self.assertTrue(
            np.abs(expected_p - p0) < 2 * expected_error,
            msg=
            "Estimated probability is {0:5.3f}, exact is {1:5.3f}, estimated standard deviation is {2:5.3f}. Is this OK?"
            .format(expected_p, p0, expected_error))

        estimated_log_marg_ll = logml_dict["logml"]

        # 3.2 corresponds to a bayes factor of 'Not worth more than a bare mention'
        self.assertTrue(
            np.abs(estimated_log_marg_ll - exact_log_marg_ll) < np.log(3.2),
            msg=
            "Estimated marginal log likelihood {0:2.5f}, exact marginal log likelihood {1:2.5f}. Is this OK?"
            .format(estimated_log_marg_ll, exact_log_marg_ll))
def test_1d():
    x_obs = theano.shared(np.zeros(1))
    with pm.Model() as pm_model:
        beta = pm.Beta('beta', 1, 1, transform=None)
        x = pm.Bernoulli('x', beta, observed=x_obs)

    model = PyMC3Model(pm_model)
    data = {x_obs: np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1])}
    zs = np.array([[0.5]])
    _test(model, data, zs)
    zs = np.array([[0.4], [0.2], [0.2351], [0.6213]])
    _test(model, data, zs)
Ejemplo n.º 25
0
def fit_spindle_density_prior():
    #data from purcell
    data = [[85, 177], [89, 148], [93, 115], [98, 71], [105, 42], [117, 20],
            [134, 17], [148, 27], [157, 39], [165, 53], [170, 68], [174, 84],
            [180, 102], [184, 123], [190, 143], [196, 156], [202, 165],
            [210, 173], [217, 176], [222, 177]]
    xscale = [0, 4]
    yscale = [0, 800]
    data_df = get_target_curve(data, xscale, yscale, scale=False)
    sample_data = np.random.choice(a=data_df['x'], p=data_df['y'], size=1000)
    with pm.Model() as model:
        a = pm.HalfNormal('a', 100 * 10)
        b = pm.HalfNormal('b', 100 * 10)
        pm.Beta('spindle_density', alpha=a, beta=b, observed=sample_data)
        trace = pm.sample(2000)
    summary_df = pm.summary(trace)
    a_est = summary_df.loc['a', 'mean']
    b_est = summary_df.loc['b', 'mean']

    n_samples = 10000
    with pm.Model() as model:
        pm.Beta('spindle_density_mean_params', alpha=a_est, beta=b_est)
        outcome = pm.sample(n_samples, njobs=1, nchains=1)
    # pm.traceplot(trace)
    # plt.show()
    samples = outcome['spindle_density_mean_params']
    sns.distplot(samples, kde=True)
    x = data_df['x']
    y = data_df['y'] * len(samples) * (x[1] - x[0])
    sns.lineplot(x, y)
    plt.show()
    print(summary_df)

    sp_per_epoch = xscale[1] * outcome['spindle_density_mean_params'] * 25 / 60
    counts, bins, patches = plt.hist(sp_per_epoch,
                                     np.arange(0, 8) - 0.5,
                                     density=True)
    sns.distplot(sp_per_epoch, kde=True, hist=False)
    plt.show()
    print(counts, bins)
def bayesian_inference_SEIR(day_array, cluster_vel_cases_array, N_SAMPLES):
    # https://discourse.pymc.io/t/how-to-sample-efficiently-from-time-series-data/4928
    N_SAMPLES = 1000
    s0, e0, i0 = 100., 50., 25.
    st0, et0, it0 = [theano.shared(x) for x in [s0, e0, i0]]

    C = np.array([3, 5, 8, 13, 21, 26, 10, 3], dtype=np.float64)
    D = np.array([1, 2, 3, 7, 9, 11, 5, 1], dtype=np.float64)

    def seir_one_step(st0, et0, it0, beta, gamma, delta):
        bt0 = st0 * beta
        ct0 = et0 * gamma
        dt0 = it0 * delta

        st1 = st0 - bt0
        et1 = et0 + bt0 - ct0
        it1 = it0 + ct0 - dt0
        return st1, et1, it1

    with pm.Model() as model:
        beta = pm.Beta('beta', 2, 10)
        gamma = pm.Beta('gamma', 2, 10)
        delta = pm.Beta('delta', 2, 10)

        (st, et, it), updates = theano.scan(fn=seir_one_step,
                                            outputs_info=[st0, et0, it0],
                                            non_sequences=[beta, gamma, delta],
                                            n_steps=len(C))

        ct = pm.Binomial('c_t', et, gamma, observed=C)
        dt = pm.Binomial('d_t', it, delta, observed=D)

        trace = pm.sample(N_SAMPLES)
        print(trace)
        visualize_trace(trace["beta"][:, None], trace["gamma"][:, None],
                        trace["delta"][:, None], N_SAMPLES)

    with model:
        bt = pm.Binomial('b_t', st, beta, shape=len(C))
        ppc_trace = pm.sample_posterior_predictive(trace, var_names=['b_t'])
Ejemplo n.º 27
0
def make_model(Gd, Cd, *P):
    n, = Gd[0].shape
    with pm.Model() as model:
        theta1 = pm.Beta('theta1', alpha=zeros(n), beta=ones(n), shape=n)

        theta2 = pm.Beta('theta2', alpha=zeros(n), beta=ones(n), shape=n)

        Gamma = pm.Bernoulli('goal', p=theta1, shape=n, observed=Gd)

        C = pm.Bernoulli('context', p=theta2, shape=n, observed=Cd)

        # Results are degree of adjacency
        for i, Pi in enumerate(P):
            beta_i = DUMvNormal('beta {}'.format(i),
                                mu=zeros(m),
                                cov=identity(m),
                                shape=m)

            DUMvNormal('premise {}'.format(i), mu=beta_i[0] * Gamma + beta_i[1] * C, \
                        cov=identity(n), shape=n, observed=Pi)

        return model
 def __init__(self, n_to_sample=1000, *args, **kwargs):
     super(BetaBayesianSolver, self).__init__(*args, **kwargs)
     self.n_to_sample = n_to_sample
     self.model = pm.Model()
     self.shared_data = theano.shared(np.ones(1) * 0.5, borrow=True)
     with self.model:
         self.alpha_dist = pm.Uniform('alpha', lower=1.0, upper=7.0)
         self.beta_dist = pm.Uniform('beta', lower=1.0, upper=7.0)
         observed = pm.Beta('obs',
                            alpha=self.alpha_dist,
                            beta=self.beta_dist,
                            observed=self.shared_data)
         self.step = pm.Metropolis()
Ejemplo n.º 29
0
 def test_sample(self):
     n_samples = 100
     xs = np.random.binomial(n=1, p=0.2, size=n_samples)
     with pm.Model():
         p = pm.Beta('p', alpha=1, beta=1)
         pm.Binomial('xs', n=1, p=p, observed=xs)
         app = self.inference().approx
         trace = app.sample(draws=1, include_transformed=False)
         assert trace.varnames == ['p']
         assert len(trace) == 1
         trace = app.sample(draws=10, include_transformed=True)
         assert sorted(trace.varnames) == ['p', 'p_logodds__']
         assert len(trace) == 10
Ejemplo n.º 30
0
def elicit(name, data):
    y = np.sort(data)
    width = y.max() - y.min()
    par = stats.beta.fit(y[1:-1], floc=y.min(), fscale=y.max())
    var = stats.beta(*par)

    scaled_mu = var.mean() / width
    scaled_sd = var.std() / width
    scaled = mc.Beta(f"{name}_scaled__", mu=scaled_mu, sd=scaled_sd)
    dist = mc.Deterministic(name, y.min() + (scaled * width))
    dist.var = var

    return dist