예제 #1
0
def test_mixture_random_shape():
    # test the shape broadcasting in mixture random
    y = np.concatenate([nr.poisson(5, size=10), nr.poisson(9, size=10)])
    with pm.Model() as m:
        comp0 = pm.Poisson.dist(mu=np.ones(2))
        w0 = pm.Dirichlet('w0', a=np.ones(2))
        like0 = pm.Mixture('like0', w=w0, comp_dists=comp0, observed=y)

        comp1 = pm.Poisson.dist(mu=np.ones((20, 2)), shape=(20, 2))
        w1 = pm.Dirichlet('w1', a=np.ones(2))
        like1 = pm.Mixture('like1', w=w1, comp_dists=comp1, observed=y)

        comp2 = pm.Poisson.dist(mu=np.ones(2))
        w2 = pm.Dirichlet('w2', a=np.ones(2), shape=(20, 2))
        like2 = pm.Mixture('like2', w=w2, comp_dists=comp2, observed=y)

        comp3 = pm.Poisson.dist(mu=np.ones(2), shape=(20, 2))
        w3 = pm.Dirichlet('w3', a=np.ones(2), shape=(20, 2))
        like3 = pm.Mixture('like3', w=w3, comp_dists=comp3, observed=y)

    rand0, rand1, rand2, rand3 = draw_values([like0, like1, like2, like3],
                                             point=m.test_point,
                                             size=100)
    assert rand0.shape == (100, 20)
    assert rand1.shape == (100, 20)
    assert rand2.shape == (100, 20)
    assert rand3.shape == (100, 20)

    with m:
        ppc = pm.sample_posterior_predictive([m.test_point], samples=200)
    assert ppc['like0'].shape == (200, 20)
    assert ppc['like1'].shape == (200, 20)
    assert ppc['like2'].shape == (200, 20)
    assert ppc['like3'].shape == (200, 20)
def v2_model(observations,
             nulls,
             null_sd,
             null_b,
             null_dispersed_prob,
             iter_count=2000,
             tune_iters=2000):
    with pm.Model() as model:
        # Probability of being a DE gene
        de_prob = pm.Beta('de_prob', alpha=1., beta=5.)

        # Probability of being downregulated
        down_prob = pm.Beta('down_prob', alpha=1., beta=1.)

        dispersed_prob = null_dispersed_prob

        mu_pos = pm.Lognormal('mu_pos', mu=-3, sd=1.)
        mu_neg = pm.Lognormal('mu_neg', mu=-3, sd=1.)
        sd_pos = pm.Gamma('sd_pos', alpha=0.01, beta=1.)
        sd_neg = pm.Gamma('sd_neg', alpha=0.01, beta=1.)
        nu_pos = pm.Gamma('nu_pos', alpha=5., beta=1.)
        nu_neg = pm.Gamma('nu_neg', alpha=5., beta=1.)

        spike_component = pm.Normal.dist(mu=0., sd=null_sd)
        slab_component = pm.Laplace.dist(mu=0., b=null_b)

        # Sample from Gaussian-Laplace mixture for null (spike-and-slab mixture)
        pm.Mixture('null',
                   comp_dists=[spike_component, slab_component],
                   w=tt.as_tensor([1. - dispersed_prob, dispersed_prob]),
                   observed=nulls)

        pos_component = pm.Bound(pm.StudentT, lower=0.).dist(mu=mu_pos,
                                                             sd=sd_pos,
                                                             nu=nu_pos)
        neg_component = pm.Bound(pm.StudentT, upper=0.).dist(mu=-mu_neg,
                                                             sd=sd_neg,
                                                             nu=nu_neg)

        pm.Mixture('obs',
                   w=tt.as_tensor([(1. - de_prob) * (1. - dispersed_prob),
                                   (1. - de_prob) * dispersed_prob,
                                   de_prob * (1. - down_prob),
                                   de_prob * down_prob]),
                   comp_dists=[
                       spike_component, slab_component, pos_component,
                       neg_component
                   ],
                   observed=observations)

        pm.Deterministic('log_prob', model.logpt)

        for RV in model.basic_RVs:
            print(RV.name, RV.logp(model.test_point))

        trace = pm.sample(iter_count, tune=tune_iters, chains=4)
        ppc = pm.sample_ppc(trace, samples=iter_count, model=model)

    return ({'trace': trace, 'ppc': ppc})
예제 #3
0
def test_mixture_random_shape():
    # test the shape broadcasting in mixture random
    y = np.concatenate([nr.poisson(5, size=10),
                        nr.poisson(9, size=10)])
    with pm.Model() as m:
        comp0 = pm.Poisson.dist(mu=np.ones(2))
        w0 = pm.Dirichlet('w0', a=np.ones(2))
        like0 = pm.Mixture('like0',
                           w=w0,
                           comp_dists=comp0,
                           shape=y.shape,
                           observed=y)

        comp1 = pm.Poisson.dist(mu=np.ones((20, 2)),
                                shape=(20, 2))
        w1 = pm.Dirichlet('w1', a=np.ones(2))
        like1 = pm.Mixture('like1',
                           w=w1,
                           comp_dists=comp1, observed=y)

        comp2 = pm.Poisson.dist(mu=np.ones(2))
        w2 = pm.Dirichlet('w2',
                          a=np.ones(2),
                          shape=(20, 2))
        like2 = pm.Mixture('like2',
                           w=w2,
                           comp_dists=comp2,
                           observed=y)

        comp3 = pm.Poisson.dist(mu=np.ones(2),
                                shape=(20, 2))
        w3 = pm.Dirichlet('w3',
                          a=np.ones(2),
                          shape=(20, 2))
        like3 = pm.Mixture('like3',
                           w=w3,
                           comp_dists=comp3,
                           observed=y)

    rand0 = like0.distribution.random(m.test_point, size=100)
    assert rand0.shape == (100, 20)

    rand1 = like1.distribution.random(m.test_point, size=100)
    assert rand1.shape == (100, 20)

    rand2 = like2.distribution.random(m.test_point, size=100)
    assert rand2.shape == (100, 20)

    rand3 = like3.distribution.random(m.test_point, size=100)
    assert rand3.shape == (100, 20)

    with m:
        ppc = pm.sample_ppc([m.test_point], samples=200)
    assert ppc['like0'].shape == (200, 20)
    assert ppc['like1'].shape == (200, 20)
    assert ppc['like2'].shape == (200, 20)
    assert ppc['like3'].shape == (200, 20)
예제 #4
0
def main():
    # Hyperparameters
    n_flips = 125
    n_coins = 10
    n_draws = 5000
    n_init_steps = 10000
    n_burn_in_steps = 1000

    # Create Causal Distribution
    causal_probs = np.random.uniform(size=n_coins)

    # Create Observations
    X = np.array([
        np.random.choice(2, p=[1 - p_, p_], size=n_flips)
        for i, p_ in enumerate(causal_probs)
    ]).T

    # Create Model
    with pm.Model() as model:
        ps = pm.Beta('probs', alpha=1, beta=1, shape=n_coins)
        components = pm.Bernoulli.dist(p=ps, shape=n_coins)
        w = pm.Dirichlet('w', a=np.ones(n_coins))
        mix = pm.Mixture('mix', w=w, comp_dists=components, observed=X)

    # Train Model
    with model:
        trace = pm.sample(n_draws, n_init=n_init_steps, tune=n_burn_in_steps)

    # Display Results
    pm.plot_trace(trace, var_names=['w', 'probs'])
    plt.show()
    pm.plot_posterior(trace, var_names=['w', 'probs'])
    plt.show()
예제 #5
0
    def nb_mixture(self, N=1000, tune=1000):
        dat = np.asarray(self.data)
        #kwargs = self.kwargs
        #if len(kwargs) < 1:
        #    print("Missing args for nb mixture model estimation")
        #    sys.exit(2)
        kwargs = self.kwargs
        if len(kwargs) < 1:
            print("missing args")
            sys.exit(0)
        print(np.max(dat))
        with pm.Model() as model:
            mu1 = pm.Uniform('mu1', lower=1, upper=self.mu)
            mu2 = pm.Uniform('mu2', lower=1, upper=self.kwargs['mu2'])

            alpha1 = pm.Uniform('alpha1', lower=0, upper=self.alpha)
            alpha2 = pm.Uniform('alpha2', lower=0, upper=self.kwargs['alpha2'])

            w = pm.Dirichlet('w', a=np.array([1, 1]))

            nb1 = pm.NegativeBinomial.dist(mu=mu1, alpha=alpha1)
            nb2 = pm.NegativeBinomial.dist(mu=mu2, alpha=alpha2)

            like = pm.Mixture('like', w=w, comp_dists=[nb1, nb2], observed=dat)
            trace_n = pm.sample(N, tune=tune, cores=2)
        return trace_n
예제 #6
0
    def create_model(self, Pm, pol_br_init, delta):
        with pm.Model() as model:
            #set priors for parameters
            etal = pm.Normal("eta_l", mu=0, sigma=10)
            etad = pm.HalfNormal("eta_d", sigma=2)
            etah = pm.Deterministic("eta_h", etal + etad)
            tauy = pm.HalfNormal("tau_y", sigma=2)
            prg = pm.Uniform("prg", lower=self.qhigh, upper=1)
            prb = pm.Uniform("prb", lower=0, upper=self.qlow)
            etam = pm.Normal("etam", mu=0, sigma=10, shape=self.nm)
            beta = pm.Normal("beta", mu=0, sigma=10)

            #eqvars is a deterministic variable that computes the equilibrium and returns the stuff I need for the likelihood
            eqvars = pm.Deterministic(
                "eqvars",
                self.eqinfo(etah, etal, tauy, prg, prb, etam, beta, Pm,
                            pol_br_init, delta))
            peff = eqvars[0]
            rvoteprob1 = eqvars[1]
            rvoteprob2 = eqvars[2]

            #compute initial voter beliefs
            zdata = self.data['party_1'] * self.data['zr_1'] + (
                1 - self.data['party_1']) * self.data['zd_1']
            mu_top = pipardata * (prg**zdata) * (1 - prg)**(1 - zdata)
            mu_bottom = mu_top + (1 - pipardata) * (prb**zdata) * (1 - prb)**(
                1 - zdata)
            mu = mu_top / mu_bottom

            #mixture weights: [good type, bad type high effort, bad type low effort]
            my_w = [mu, (1 - mu) * peff, (1 - mu) * (1 - peff)]

            #component distributions (2-dimensional multivariate normals)
            mvcomp1 = pm.MvNormal("ydist",
                                  mu=[etah, etah],
                                  cov=np.identity(3) * (tauy**(-2)),
                                  shape=2)
            mvcomp2 = pm.MvNormal("ydist",
                                  mu=[etah, etal],
                                  cov=np.identity(3) * (tauy**(-2)),
                                  shape=2)
            mvcomp3 = pm.MvNormal("ydist",
                                  mu=[etal, etal],
                                  cov=np.identity(3) * (tauy**(-2)),
                                  shape=2)

            #likelihood
            Y_obs = pm.Mixture('Y',
                               w=my_w,
                               comp_dists=[mvcomp1, mccomp2, mccomp3],
                               observed=Y)
            rvotes1 = pm.Binomial(n=self.data[rvotes_1] + self.data[dvotes_1],
                                  p=rvoteprob1,
                                  observed=self.data[rvotes_1])
            rvotes2 = pm.Binomial(n=self.data[rvotes_2] + self.data[dvotes_2],
                                  p=rvoteprob1,
                                  observed=self.data[rvotes_2])
예제 #7
0
    def get_model(self, vfield0, sigma_vfield0, rlim=1 * u.kpc):
        # Number of prior mixture components:
        with pm.Model() as model:

            # True distance:
            BoundedR = pm.Bound(UniformSpaceDensity,
                                lower=0,
                                upper=rlim.to_value(u.pc))
            r = BoundedR("r", rlim.to_value(u.pc), shape=(self.N, 1))

            # Milky Way velocity distribution
            K = vfield0.shape[0]
            w = pm.Dirichlet('w', a=np.ones(K))

            # Set up means and variances:
            meanvs = []
            sigvs = []
            for k in range(K):
                vtmp = pm.Normal(f'vmean{k}', vfield0[k], 10., shape=3)  # HACK

                BoundedNormal = pm.Bound(pm.Normal, lower=1.5, upper=5.3)
                lnstmp = BoundedNormal(f'lns{k}', np.log(sigma_vfield0[k]),
                                       0.2)
                stmp = pm.Deterministic(f'vsig{k}', tt.exp(lnstmp))

                meanvs.append(vtmp)
                sigvs.append(stmp)

            pvdists = []
            for k in range(K):
                pvtmp = pm.MvNormal.dist(meanvs[k],
                                         tau=np.eye(3) * 1 / sigvs[k]**2,
                                         shape=3)
                pvdists.append(pvtmp)
            vxyz = pm.Mixture('vxyz',
                              w=w,
                              comp_dists=pvdists,
                              shape=(self.N, 3))

            # Velocity in tangent plane coordinates
            vtan = tt.batched_dot(self.Ms, vxyz)

            model_pm = vtan[:, :2] / r * pc_mas_yr_per_km_s
            model_rv = vtan[:, 2:3]
            model_y = tt.concatenate((1000 / r, model_pm, model_rv), axis=1)

            pm.Deterministic('model_y', model_y)
            # val = pm.MvNormal('like', mu=model_y, tau=Cinv, observed=y)
            dy = self.ys - model_y
            pm.Potential(
                'chisq',
                -0.5 * tt.batched_dot(dy, tt.batched_dot(self.Cinvs, dy)))

        return model
예제 #8
0
def build_model(data, K):
    n_ppt = len(data)
    print('Building model with n=%d,K=%d' % (n_ppt, K))
    with pm.Model() as gmm:
        #Prior
        if K > 1:
            p = pm.Dirichlet('p',
                             a=pm.floatX(np.array([1.] * K)),
                             testval=pm.floatX(np.ones(K) / K))
        mus_p = [
            pm.MvNormal('mu_%s' % pid,
                        mu=pm.floatX(np.zeros(2)),
                        tau=pm.floatX(0.1 * np.eye(2)),
                        shape=(K, 2)) for pi, pid in enumerate(data.keys())
        ]

        packed_L = [[
            pm.LKJCholeskyCov('packed_L_%s_%d' % (pid, i),
                              n=2,
                              eta=pm.floatX(2.),
                              sd_dist=pm.HalfCauchy.dist(.01))
            for i in range(K)
        ] for pi, pid in enumerate(data.keys())]
        L = [[
            pm.expand_packed_triangular(2, packed_L[pi][i]) for i in range(K)
        ] for pi, pid in enumerate(data.keys())]

        sigma = [[
            pm.Deterministic('sigma_%s_%d' % (pid, i),
                             L[pi][i].dot(L[pi][i].T)) for i in range(K)
        ] for pi, pid in enumerate(data.keys())]

        if K > 1:
            mvnl = [[
                pm.MvNormal.dist(mu=mus_p[pi][i], chol=L[pi][i])
                for i in range(K)
            ] for pi in range(n_ppt)]
            Y_obs = [
                pm.Mixture('Y_obs_%s' % pid,
                           w=p,
                           comp_dists=mvnl[pi],
                           observed=data[pid])
                for pi, pid in enumerate(data.keys())
            ]
        else:
            Y_obs = [
                pm.MvNormal('Y_obs_%s' % pid,
                            mu=mus_p[pi][0],
                            chol=L[pi][0],
                            observed=data[pid])
                for pi, pid in enumerate(data.keys())
            ]

    return gmm
예제 #9
0
    def test_sample_prior_and_posterior(self):
        def build_toy_dataset(N, K):
            pi = np.array([0.2, 0.5, 0.3])
            mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]]
            stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]]
            x = np.zeros((N, 3), dtype=np.float32)
            y = np.zeros((N, ), dtype=np.int)
            for n in range(N):
                k = np.argmax(np.random.multinomial(1, pi))
                x[n, :] = np.random.multivariate_normal(
                    mus[k], np.diag(stds[k]))
                y[n] = k
            return x, y

        N = 100  # number of data points
        K = 3  # number of mixture components
        D = 3  # dimensionality of the data

        X, y = build_toy_dataset(N, K)

        with pm.Model() as model:
            pi = pm.Dirichlet("pi", np.ones(K), shape=(K, ))

            comp_dist = []
            mu = []
            packed_chol = []
            chol = []
            for i in range(K):
                mu.append(pm.Normal("mu%i" % i, 0, 10, shape=D))
                packed_chol.append(
                    pm.LKJCholeskyCov("chol_cov_%i" % i,
                                      eta=2,
                                      n=D,
                                      sd_dist=pm.HalfNormal.dist(2.5)))
                chol.append(
                    pm.expand_packed_triangular(D, packed_chol[i], lower=True))
                comp_dist.append(
                    pm.MvNormal.dist(mu=mu[i], chol=chol[i], shape=D))

            pm.Mixture("x_obs", pi, comp_dist, observed=X)
        with model:
            trace = pm.sample(30, tune=10, chains=1)

        n_samples = 20
        with model:
            ppc = pm.sample_posterior_predictive(trace, n_samples)
            prior = pm.sample_prior_predictive(samples=n_samples)
        assert ppc["x_obs"].shape == (n_samples, ) + X.shape
        assert prior["x_obs"].shape == (n_samples, ) + X.shape
        assert prior["mu0"].shape == (n_samples, D)
        assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
    def fit(self, y, yerr, x, gmm_params, sample_kwargs={}):
        """
        yerr :
            The uncertainties (standard deviations) in measured y.
        """
        x = np.asarray(x)
        y = np.asarray(y)
        yerr = np.asarray(yerr)

        # Left merge dictionary
        default_sample_kwargs = dict(draws=1000, tune=1000, chains=2)
        sample_kwargs = {**default_sample_kwargs, **sample_kwargs}

        nsamples = y.shape[0]

        assert gmm_params.shape[
            1] % 3 == 0, "GMM params shape 1 must multiple of 3."
        k = gmm_params.shape[1] // 3
        mu_x = gmm_params[:, :k]
        sigma_x = gmm_params[:, k:2 * k]
        weights_x = gmm_params[:, 2 * k:3 * k]

        with pm.Model() as model:  # noqa
            # Priors
            intercept = pm.Uniform("intercept", -1, 1)
            slope = pm.Uniform("slope", -1, 0)
            scatter_sigma = pm.HalfNormal("scatter", 2)

            components = []
            for i in range(k):
                component = pm.Normal.dist(mu=mu_x[:, i],
                                           sigma=sigma_x[:, i],
                                           shape=nsamples)
                components.append(component)
            x = pm.Mixture("x",
                           w=weights_x,
                           comp_dists=components,
                           shape=nsamples)

            # Likelihoods
            # Constant value for true y which served as the mean value of the observed y
            y_true = pm.Deterministic("y_true", slope * x + intercept)
            # Standard deviation of observed y as Gaussian errors
            y_sigma = pm.Deterministic(
                "sigma", pm.math.sqrt(yerr**2 + scatter_sigma**2))
            # Altogether, observed y is normally distributed
            y_ = pm.Normal("y", mu=y_true, sigma=y_sigma, observed=y)

            trace = pm.sample(**sample_kwargs)

        return trace, model
예제 #11
0
def level_model(y_old, y_new):
    # PyMC3 level changepoint model
    # level is modeled by Poisson RVs
    # y_old: older data points since the last changepoint
    # y_new: last win(10) datapoints
    mean_new = y_new.mean() if len(y_new) > 0 else None
    mean_old = y_old.mean() if len(y_old) > 0 else mean_new
    y_ = np.concatenate((y_old, y_new))
    y_obs = theano.shared(y_)
    with pm.Model() as model:
        w = pm.Dirichlet('w', a=np.ones(2))
        lambda_ = pm.Exponential('lambda', lam=np.array([1.0 / mean_old, 1.0 / mean_new]), shape=(2,))
        components = pm.Poisson.dist(mu=lambda_, shape=(2, ))
        diff = pm.Deterministic('diff', lambda_[0] - lambda_[1])
        obs = pm.Mixture('obs', w=w, comp_dists=components, observed=y_obs)
    return model
예제 #12
0
def mixture_model(data):
    with pm.Model() as model:
        hyper_mean = pm.Uniform('hyper_mean', -100, 10)
        hyper_mean1 = pm.Uniform('hyper_mean1', 100, 300)
    
        hyper_sigma = pm.Uniform('hyper_sigma', 0, 100)
        hyper_sigma1 = pm.Uniform('hyper_sigma1', 0, 150)
    
        component = pm.Normal.dist(mu=hyper_mean, sd=hyper_sigma)
        component1 = pm.Normal.dist(mu=hyper_mean1, sd=hyper_sigma1)
    
        w = pm.Dirichlet('w', a=np.array([1, 1]))
        like = pm.Mixture('like', w=w, comp_dists=[component, component1], observed=data)

    with model:
        trace = pm.sample(5000, tune=2500, njobs=1)[1000:]
예제 #13
0
def build_model(data, K):
    N = data.shape[0]
    d = data.shape[1]
    print('Building model with n=%d, d=%d, k=%d' % (N, d, K))
    with pm.Model() as gmm:
        #Prior over component weights
        if K > 1:
            p = pm.Dirichlet('p', a=np.array([1.] * K))

        #Prior over component means
        mus = [
            pm.MvNormal('mu_%d' % i,
                        mu=pm.floatX(np.zeros(d)),
                        tau=pm.floatX(0.1 * np.eye(d)),
                        shape=(d, ))
            #testval = pm.floatX(np.ones(d)))
            for i in range(K)
        ]
        #Cholesky decomposed LKJ prior over component covariance matrices
        packed_L = [
            pm.LKJCholeskyCov('packed_L_%d' % i,
                              n=d,
                              eta=2.,
                              sd_dist=pm.HalfCauchy.dist(1))
            #testval = pm.floatX(np.ones(int(d*(d-1)/2+d))))
            for i in range(K)
        ]
        #Unpack packed_L into full array
        L = [pm.expand_packed_triangular(d, packed_L[i]) for i in range(K)]
        #Convert L to sigma and tau for convenience
        sigma = [
            pm.Deterministic('sigma_%d' % i, L[i].dot(L[i].T))
            for i in range(K)
        ]
        tau = [
            pm.Deterministic('tau_%d' % i, matrix_inverse(sigma[i]))
            for i in range(K)
        ]

        #Specify the likelihood
        if K > 1:
            mvnl = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)]
            Y_obs = pm.Mixture('Y_obs', w=p, comp_dists=mvnl, observed=data)
        else:
            Y_obs = pm.MvNormal('Y_obs', mu=mus[0], chol=L[0], observed=data)

    return gmm
예제 #14
0
def get_model():

    conf = bayes_workshop.conf.get_conf()

    (cols, data) = bayes_workshop.data.get_data()

    demo_subj_id = 2
    demo_modality = "visual"

    i_demo_trials = np.logical_and(
        data[:, cols.index("i_subj")] == demo_subj_id,
        (data[:, cols.index("i_modality")]
         == conf.modalities.index(demo_modality)))

    demo_data = data[i_demo_trials, :]

    (n_trials, _) = demo_data.shape

    responses = demo_data[:, cols.index("target_longer")]
    cmp_dur = demo_data[:, cols.index("target_duration")]

    with pm.Model() as model:

        alpha = pm.Normal("alpha", mu=conf.standard_ms, sd=50.0)

        beta = pm.HalfNormal("beta", sd=100.0)

        lapse_bias = pm.Uniform("psi", lower=0.0, upper=1.0)

        # probability of lapsing on each trial
        lapse_p = pm.Uniform("phi", lower=0.0, upper=1.0)

        lapse_ind = pm.Bernoulli("lapse_ind", p=lapse_p, shape=n_trials)

        theta_pf = bayes_workshop.utils.logistic(x=cmp_dur,
                                                 alpha=alpha,
                                                 beta=beta)

        obs_pf = pm.Bernoulli.dist(p=theta_pf)
        obs_lapse = pm.Bernoulli.dist(p=lapse_bias, shape=n_trials)

        mix = pm.Mixture("obs",
                         w=tt.stack([1.0 - lapse_ind, lapse_ind], axis=1),
                         comp_dists=[obs_pf, obs_lapse],
                         observed=responses)

    return model
	def run_non_sparse_initialization(self):
		rat = self.allelic_counts/self.total_counts
		nans = np.isnan(rat)
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init)
			ALPHA = pm.HalfCauchy('ALPHA', beta=5, shape=(1, self.S))
			BETA = pm.Normal('BETA', mu=0, tau=(1.0/10.0), shape=(self.S, self.num_cov), testval=self.beta_init)
			U = pm.Normal('U', mu=0, tau=(1.0/1.0), shape=(self.N, self.K), testval=self.U_init)
			V = pm.Normal('V', mu=0, tau=(1.0/1.0), shape=(self.S, self.K), testval=self.V_init)

			MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init)
			SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init)
			mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A)
			sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A)
			A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init)

			p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:])
			conc_mat = pm.math.dot(np.ones((self.N,1)), CONC)

			w = pm.Dirichlet('w', a=np.ones((self.S,2)))

			beta_null_mat = pm.math.dot(np.ones((self.N,1)), ALPHA)

			BB_GLM = pm.BetaBinomial.dist(alpha=(p*conc_mat), beta=((1.0-p)*conc_mat), n=self.total_counts)
			BB_NULL = pm.BetaBinomial.dist(alpha=(np.ones((self.N,self.S))), beta=7.0+beta_null_mat, n=self.total_counts)

			mixmod = pm.Mixture('mixmodel', w=w, comp_dists=[BB_GLM, BB_NULL], observed=self.allelic_counts, shape=2)
			approx = pm.fit(method='advi', n=2000)
		#pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		y = means_dict['w_stickbreaking__'].T
		y = np.concatenate([y, -np.sum(y, 0, keepdims=True)])
		e_y = np.exp(y - np.max(y, 0, keepdims=True))
		w_learned = e_y / np.sum(e_y, 0, keepdims=True)
		self.conc_init = np.exp(means_dict['CONC_log__'])
		self.alpha_init = np.exp(means_dict['ALPHA_log__'])
		self.beta_init = means_dict['BETA']
		self.U_init = means_dict['U']
		self.V_init = means_dict['V']
		self.mu_a_init = means_dict['MU_A']
		self.sigma_a_init = np.exp(means_dict['SIGMA_A_log__'])
		self.A_init = means_dict['A']
		self.w_init = w_learned.T
	def run_factorization(self):
		rat = self.allelic_counts/self.total_counts
		nans = np.isnan(rat)
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init)
			ALPHA = pm.HalfCauchy('ALPHA', beta=5, shape=(1, self.S), testval=self.alpha_init)
			BETA = pm.Normal('BETA', mu=0, tau=(1.0/10.0), shape=(self.S, self.num_cov), testval=self.beta_init)
			gamma = pm.HalfCauchy('GAMMA', beta=5, shape=(self.N, self.K), testval=np.ones((self.N, self.K)))
			U = pm.Normal('U', mu=0, sigma=1.0/gamma, shape=(self.N, self.K), testval=self.U_init)
			V = pm.Normal('V', mu=0, tau=(1.0/1.0), shape=(self.S, self.K), testval=self.V_init)

			MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init)
			SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init)
			mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A)
			sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A)
			A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init)

			p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:])
			conc_mat = pm.math.dot(np.ones((self.N,1)), CONC)

			w = pm.Dirichlet('w', a=np.ones((self.S,2)), testval=self.w_init)

			beta_null_mat = pm.math.dot(np.ones((self.N,1)), ALPHA)

			BB_GLM = pm.BetaBinomial.dist(alpha=(p*conc_mat), beta=((1.0-p)*conc_mat), n=self.total_counts)
			BB_NULL = pm.BetaBinomial.dist(alpha=(np.ones((self.N,self.S))), beta=7.0+beta_null_mat, n=self.total_counts)

			mixmod = pm.Mixture('mixmodel', w=w, comp_dists=[BB_GLM, BB_NULL], observed=self.allelic_counts, shape=2)
			approx = pm.fit(method='advi', n=30000)
		#pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_ALPHA.txt', (np.exp(means_dict['ALPHA_log__'])), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_GAMMA.txt', (np.exp(means_dict['GAMMA_log__'])), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_CONC.txt', (np.exp(means_dict['CONC_log__'])), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_w_stick_breaking.txt', (np.exp(means_dict['w_stickbreaking__'])), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_ELBO.txt', (approx.hist), fmt="%s", delimiter='\t')
def run_null_model(nulls, iter_count=2000, tune_iters=2000):
    with pm.Model() as model:
        sd_null = pm.Gamma('sd_null', alpha=.1, beta=1.)
        b_null = pm.Gamma('b_null', alpha=1., beta=.1)

        dispersed_prob = pm.Beta('dispersed_prob', alpha=1., beta=1.)

        pm.Mixture('null',
                   comp_dists=[
                       pm.Normal.dist(mu=0., sd=sd_null),
                       pm.Laplace.dist(mu=0., b=b_null)
                   ],
                   w=tt.as_tensor([1. - dispersed_prob, dispersed_prob]),
                   observed=nulls)

        pm.Deterministic('log_prob', model.logpt)

        trace = pm.sample(iter_count, tune=tune_iters, chains=4)
        ppc = pm.sample_ppc(trace, samples=iter_count, model=model)

    return ({'trace': trace, 'ppc': ppc})
예제 #18
0
    def _model_eval(self, X, K, n_samples):
        # setup model
        with pm.Model() as model:
            data_dim = X.shape[1]
            # prior of mixture ratio
            w = pm.Dirichlet('w', a=np.ones(K))
            # setup the likelihood
            init_mu = np.zeros(data_dim)
            components = [
                self._multivariate_normal_dist(init_mu, suffix=k)
                for k in range(K)
            ]
            like = pm.Mixture('like', w=w, comp_dists=components, observed=X)

        # fit model
        with model:
            trace = pm.sample(2000,
                              step=pm.NUTS(),
                              start=pm.find_MAP(),
                              tune=1000)

        # store the result
        self.result['K=' + str(K)] = trace
#定义stick_breaking过程
def stick_breaking(beta):
    portion_remaining = tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])

    return beta * portion_remaining



with pm.Model() as model:
    M=pm.Gamma('M',1.,1.)
    
    sigma=pm.Uniform('sigma',0.,1.)
    mu1=pm.Normal('mu',0.,1.)
    xi=pm.InverseGamma('xi',1.,1.)
    b=pm.Normal('b',0.,xi,shape=N)
    
    sigma_w=pm.Uniform('sigma_w',0.,1.,shape=K)
    rho=pm.Uniform('rho',0.,1.,shape=K)
    
    beta=pm.Beta('beta',1.,M,shape=K)
    w=pm.Deterministic('w',stick_breaking(beta))
    
    
    omega=pm.Mixture('omega',w,pm.MvNormal.dist(mu=mu0,cov=sigma_w[:,np.newaxis,np.newaxis]**2*H(rho)))
    
    obs=pm.MvNormal('obs',mu=(mu1+b)*e+omega,cov=sigma**2*I,observed=dataSet1)
    

    
    
예제 #20
0
        # Convert L to sigma for convenience
        sigma = [[
            pm.Deterministic('sigma_%d_%d' % (pid, i),
                             L[pi][i].dot(L[pi][i].T)) for i in range(k)
        ] for pi, pid in enumerate(data.keys())]

        # Specify the likelihood
        if k > 1:
            mvnl = [[
                pm.MvNormal.dist(mu=mus_p[pi][i], chol=L[pi][i])
                for i in range(k)
            ] for pi in range(n_ppt)]
            Y_obs = [
                pm.Mixture('Y_obs_%d' % pid,
                           w=p,
                           comp_dists=mvnl[pi],
                           observed=data[pid])
                for pi, pid in enumerate(data.keys())
            ]
        else:
            Y_obs = [
                pm.MvNormal('Y_obs_%d' % pid,
                            mu=mus_p[pi][0],
                            chol=L[pi][0],
                            observed=data[pid])
                for pi, pid in enumerate(data.keys())
            ]

## Run sampler
# Initialize list of traces
traces = []
예제 #21
0
    def _multivariate_normal_prior(self, key):
        """
        Map the bilby MultivariateNormal prior to a PyMC3 style function.
        """

        # check prior is a PowerLaw
        pymc3, STEP_METHODS, floatX = self._import_external_sampler()
        theano, tt, as_op = self._import_theano()
        if isinstance(self.priors[key], MultivariateGaussian):
            # get names of multivariate Gaussian parameters
            mvpars = self.priors[key].mvg.names

            # set the prior on multiple parameters if not present yet
            if not np.all([p in self.multivariate_normal_sets for p in mvpars]):
                mvg = self.priors[key].mvg

                # get bounds
                lower = [bound[0] for bound in mvg.bounds.values()]
                upper = [bound[1] for bound in mvg.bounds.values()]

                # test values required for mixture
                testvals = []
                for bound in mvg.bounds.values():
                    if np.isinf(bound[0]) and np.isinf(bound[1]):
                        testvals.append(0.)
                    elif np.isinf(bound[0]):
                        testvals.append(bound[1] - 1.)
                    elif np.isinf(bound[1]):
                        testvals.append(bound[0] + 1.)
                    else:
                        # half-way between the two bounds
                        testvals.append(bound[0] + (bound[1] - bound[0]) / 2.)

                # if bounds are at +/-infinity set to 100 sigmas as infinities
                # cause problems for the Bound class
                maxmu = np.max(mvg.mus, axis=0)
                minmu = np.min(mvg.mus, axis=0)
                maxsigma = np.max(mvg.sigmas, axis=0)
                for i in range(len(mvpars)):
                    if np.isinf(lower[i]):
                        lower[i] = minmu[i] - 100. * maxsigma[i]
                    if np.isinf(upper[i]):
                        upper[i] = maxmu[i] + 100. * maxsigma[i]

                # create a bounded MultivariateNormal distribution
                BoundedMvN = pymc3.Bound(pymc3.MvNormal, lower=lower, upper=upper)

                comp_dists = []  # list of any component modes
                for i in range(mvg.nmodes):
                    comp_dists.append(BoundedMvN('comp{}'.format(i), mu=mvg.mus[i],
                                                 cov=mvg.covs[i],
                                                 shape=len(mvpars)).distribution)

                # create a Mixture model
                setname = 'mixture{}'.format(self.multivariate_normal_num_sets)
                mix = pymc3.Mixture(setname, w=mvg.weights, comp_dists=comp_dists,
                                    shape=len(mvpars), testval=testvals)

                for i, p in enumerate(mvpars):
                    self.multivariate_normal_sets[p] = {}
                    self.multivariate_normal_sets[p]['prior'] = mix[i]
                    self.multivariate_normal_sets[p]['set'] = setname
                    self.multivariate_normal_sets[p]['index'] = i

                self.multivariate_normal_num_sets += 1

            # return required parameter
            return self.multivariate_normal_sets[key]['prior']

        else:
            raise ValueError("Prior for '{}' is not a MultivariateGaussian".format(key))
예제 #22
0
    def sample(self, name_chain, noncentered=False):
        """
        Sample from the hierarchical model
        p(mu) ~ U(0,1)
        p(phi) ~ U(0,pi)
        p(muB) ~ U(0.0, 1.0)
        p(muC) ~ U(0.0, 1.0)
        p(sigmaB) ~ HN(sd=1)
        p(sigmaC) ~ HN(sd=1)
        p(B|muB,sigmaB) ~ BoundedNormal(mu=muB, sd=sigmaB, lower=0, upper=1)
        p(C|muC,sigmaC) ~ BoundedNormal(mu=muC, sd=sigmaC, lower=0, upper=1)
        qobs ~ N(mu=q, sd=noise)
        q = f(B,C,mu,phi)

        """

        self.name_chain = name_chain

        A = 1.0

        # Define the probabilistic model
        self.model = pm.Model()
        with self.model:

            # Priors for orientation
            mu = pm.Uniform('mu',
                            lower=0,
                            upper=1.0,
                            testval=0.5,
                            shape=self.n_galaxies)
            phi = pm.Uniform('phi',
                             lower=0,
                             upper=np.pi / 2.0,
                             testval=0.1,
                             shape=self.n_galaxies)

            # Priors for means and standard deviations. Perhaps one should play a little with the
            # priors for sdB and sdC because they are usually not very well constrained by data
            muCB_ = pm.Uniform('muCB_',
                               lower=0.0,
                               upper=1.0,
                               testval=[0.3, 0.8],
                               shape=2)
            muCB = pm.Deterministic('muCB', tt.sort(muCB_))

            muCB2_ = pm.Uniform('muCB2_',
                                lower=0.0,
                                upper=1.0,
                                testval=[0.3, 0.8],
                                shape=2)
            muCB2 = pm.Deterministic('muCB2', tt.sort(muCB2_))

            sdCB = pm.HalfNormal('sdCB', sd=0.05, shape=2)
            sdCB2 = pm.HalfNormal('sdCB2', sd=0.05, shape=2)

            w = pm.Dirichlet('w', np.ones(2))

            # Use a non-centered model (http://twiecki.github.io/blog/2017/02/08/bayesian-hierchical-non-centered/)
            if (noncentered):
                offset = pm.Normal('offset',
                                   mu=0,
                                   sd=1,
                                   shape=(self.n_galaxies, 2))
                CB_ = pm.Deterministic('CB_',
                                       tt.clip(muCB + offset * sdCB, 0.0, 1.0))
                CB = pm.Deterministic('CB', tt.sort(CB_, axis=1))
            else:

                bounded_normal = pm.Bound(pm.Normal, lower=0.0, upper=1.0)
                CB_ = bounded_normal.dist('CB_',
                                          mu=muCB,
                                          sd=sdCB,
                                          testval=np.array([0.3, 0.8]),
                                          shape=(self.n_galaxies, 2))
                CB2_ = bounded_normal.dist('CB2_',
                                           mu=muCB2,
                                           sd=sdCB2,
                                           testval=np.array([0.3, 0.8]),
                                           shape=(self.n_galaxies, 2))
                comp_dists = [CB_, CB2_]

                mix = pm.Mixture('mix',
                                 w=w,
                                 comp_dists=comp_dists,
                                 testval=np.array([0.3, 0.8]),
                                 shape=(self.n_galaxies, 2))

                CB = pm.Deterministic('CB', tt.sort(mix, axis=1))


# Now that we have all ingredients, compute q
#             sin_theta = tt.sqrt(1.0 - mu**2)
#             f = ( A*CB[:,0]*sin_theta*tt.cos(phi) )**2 + ( CB[:,1]*CB[:,0]*sin_theta*tt.sin(phi) )**2 + ( A*CB[:,1]*mu )**2
#             g = A*A * (tt.cos(phi)**2 + mu**2 * tt.sin(phi)**2) + \
#                 CB[:,1]*CB[:,1] * (tt.sin(phi)**2 + mu**2 * tt.cos(phi)**2) + CB[:,0]*CB[:,0] * sin_theta**2

#             h = tt.sqrt(  (g - 2 * tt.sqrt(f)) / (g + 2 * tt.sqrt(f))  )
#             q = (1 - h) / (1 + h)

# # And define the normal likelihood
#             qobs = pm.Normal('qobs', mu=q, sd=self.sigmaq, observed=self.qobs, shape=self.n_galaxies)

# Finally sample from the posterior and use a CSV backend for later plots
            db = pm.backends.Text(self.name_chain)
            self.trace = pm.sample(chains=4, trace=db)
            self.ppc = pm.sample_ppc(self.trace,
                                     samples=500,
                                     model=self.model,
                                     size=100)
예제 #23
0
        testval=0) - 1

    # --- Raters ability to detect true spindles --- #
    # rater_expertise = pm.Bound(pm.Normal, lower=0.)('rater_expertise',
    #                                                 mu=expected_std_for_accuracy,
    #                                                 sd=0.3,
    #                                                 shape=n_raters)

    # --- Observed behaviour --- #
    # Spindle start when marker is real
    mapping = mapping_marker_to_true_spindle[data['t']]
    spindle_real = pm.Normal.dist(mu=tss[mapping],
                                  sd=1)  #rater_expertise[data['rater_i']])
    contaminate_spindle_start.mean = 12.5  # hack, https://discourse.pymc.io/t/how-to-use-a-densitydist-in-a-mixture/1371/2
    spindle_real.mean = 12.5
    obs_start = pm.Mixture(
        'marker_start',
        w=marker_is_from_real_spindle_stacked,
        comp_dists=[spindle_real, contaminate_spindle_start],
        observed=data['s'])

with model:
    trace = pm.sample(tune=1000,
                      init="adapt_diag",
                      nuts_kwargs={
                          'target_accept': 0.99
                      })  # turn off jitter so we dont break ordering gss

pm.traceplot(trace)
plt.show()
print(pm.summary(trace))
def splatter_model(observations,
                   nulls,
                   null_sd,
                   null_b,
                   null_dispersed_prob,
                   iter_count=2000,
                   tune_iters=2000):
    with pm.Model() as model:
        # Probability of being a DE gene
        de_prob = pm.Uniform('de_prob', lower=0., upper=1.)

        # Probability of being downregulated
        down_prob = pm.Beta('down_prob', alpha=1., beta=1.)

        # Mean and sd for Gaussian for DE genes
        mu_pos = pm.Lognormal('mu_pos', mu=0., sd=1.)
        mu_neg = pm.Lognormal('mu_neg', mu=0., sd=1.)

        sd_pos = pm.Gamma('sd_pos', alpha=1., beta=1.)
        sd_neg = pm.Gamma('sd_neg', alpha=1., beta=1.)

        dispersed_prob = null_dispersed_prob
        spike_component = pm.Normal.dist(mu=0., sd=null_sd)
        slab_component = pm.Laplace.dist(mu=0., b=null_b)

        # Sample from Gaussian-Laplace mixture for null (spike-and-slab mixture)
        pm.Mixture('null',
                   comp_dists=[spike_component, slab_component],
                   w=tt.as_tensor([1. - dispersed_prob, dispersed_prob]),
                   observed=nulls)

        pos_component = pm.Bound(pm.Normal, lower=0.).dist(mu=mu_pos,
                                                           sd=sd_pos)
        neg_component = pm.Bound(pm.Normal, upper=0.).dist(mu=-1 * mu_neg,
                                                           sd=sd_neg)
        pos_component_abs = pm.Bound(pm.Normal, lower=0.).dist(mu=-1 * mu_pos,
                                                               sd=sd_pos)
        neg_component_abs = pm.Bound(pm.Normal, upper=0.).dist(mu=mu_neg,
                                                               sd=sd_neg)

        cdf_pos = cdf(mu=mu_pos, sd=sd_pos, value=0.)
        cdf_neg = cdf(mu=-1 * mu_neg, sd=sd_neg, value=0.)

        pm.Mixture('obs',
                   w=tt.as_tensor([(1. - de_prob) * (1. - dispersed_prob),
                                   (1. - de_prob) * dispersed_prob,
                                   de_prob * (1. - down_prob) * (1. - cdf_pos),
                                   de_prob * down_prob * cdf_neg,
                                   de_prob * (1. - down_prob) * cdf_pos,
                                   de_prob * down_prob * (1. - cdf_neg)]),
                   comp_dists=[
                       spike_component, slab_component, pos_component,
                       neg_component, pos_component_abs, neg_component_abs
                   ],
                   observed=observations)

        pm.Deterministic('log_prob', model.logpt)

        trace = pm.sample(iter_count, tune=tune_iters, chains=4)
        ppc = pm.sample_ppc(trace, samples=iter_count, model=model)

    return ({'trace': trace, 'ppc': ppc})
예제 #25
0
    comp_dist = []
    mu = []
    packed_chol = []
    chol = []
    for i in range(K):
        temp_mean = np.random.randint(low=50, high=200, size=D)
        mu.append(pm.Normal('mu%i' % i, temp_mean, 20, shape=D))
        packed_chol.append(
            pm.LKJCholeskyCov('chol_cov_%i' % i,
                              eta=2,
                              n=D,
                              sd_dist=pm.HalfNormal.dist(10)))
        chol.append(pm.expand_packed_triangular(D, packed_chol[i], lower=True))
        comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i]))

    xobs = pm.Mixture('x_obs', pi, comp_dist, observed=X_shared)

print("making inference...")
# Inference
with model:
    advi_mf = pm.ADVI()
    advi_mf.fit(10000,
                more_replacements={X_shared: X_minibatch},
                obj_optimizer=pm.adagrad(learning_rate=1e-2))

fig = plt.figure()
plt.plot(advi_mf.hist)
plt.title("loss function")
plt.savefig(out_path + "/" + "lossPlot.jpg")

print("making prediction...")
예제 #26
0
def main():
    dsNum, dType = 1, "isNat"
    inDir = os.environ['LATDIR'] + '/data/MCMC'
    df = pd.read_hdf('{}/DS{}_Spectrum_{}.h5'.format(inDir, dsNum, dType))
    dfTrit = pd.read_hdf('{}/TritSpec.h5'.format(inDir))

    Y = df['Energy'].values
    # Some values are negative (because Steve) so we need to fix it
    # Also skip some values because who needs them?
    Tritium = dfTrit['Tritium'].values[1::2]
    Tritium = np.array([x if x >= 0. else 0. for x in Tritium])
    Energy = dfTrit['Energy'].values[1::2]

    # Efficiencies -- not implemented yet
    # dfEff = pd.read_hdf('{}/DS{}_{}_Efficiency.h5'.format(inDir, dsNum, dType))
    # XEff = dfEff['Energy'].values
    # YEff = dfEff['Efficiency'].values

    # Peak means
    meansList = [6.54, 8.98, 10.37, 46.54]
    sdList = [GetSigma(mean, dsNum) for mean in meansList]

    with pm.Model() as model:

        Fe55 = pm.Normal.dist(mu=meansList[0], sd=sdList[0])
        Zn65 = pm.Normal.dist(mu=meansList[1], sd=sdList[1])
        Ge68 = pm.Normal.dist(mu=meansList[2], sd=sdList[2])
        Pb210 = pm.Normal.dist(mu=meansList[3], sd=sdList[3])
        Bkg = pm.Uniform.dist(lower=2., upper=50.)

        Tritium = pm.Interpolated("Tritium",
                                  x_points=Energy,
                                  pdf_points=Tritium,
                                  testval=5.)
        weights = pm.Dirichlet('weights', a=np.ones(len(meansList) + 2))

        Mu = pm.Normal('Mu', mu=0.77, sd=0.1)
        Sig = pm.Normal('Sig', mu=0.56, sd=0.1)
        eff = LogisticFunc("Efficiency", mu=Mu, sd=Sig, testval=5.)

        TritEff = Tritium * eff

        mix = pm.Mixture('mix',
                         w=weights,
                         comp_dists=[Fe55, Zn65, Ge68, Pb210, TritEff, Bkg],
                         testval=5.)

        # mix = pm.Mixture('mix', w=weights, comp_dists=[Fe55, Zn65, Ge68, Pb210, Tritium, Bkg], testval=5., observed=Y)

    with model:
        trace = pm.sample(draws=10000, n_init=2000, tune=2000)

    pm.traceplot(trace)
    print(pm.summary(trace))
    # ppc = pm.sample_ppc(trace, samples=500, model=model, size=100)
    # print(np.asarray(ppc['weights']).shape)

    # _, ax = plt.subplots(figsize=(12, 6))
    # ax.hist([n.mean() for n in ppc['n']], bins=19, alpha=0.5)
    # ax.axvline(df['Energy'].mean())
    # ax.set(title='Posterior predictive of the mean', xlabel='mean(x)', ylabel='Frequency');
    plt.show()
예제 #27
0
def contaminate_mixture(data, fit_for='z', fit_data=None): #stickbreaking problems
    steps = []
    # shapes and sizes
    n_epochs = data['epoch_i'].max() + 1  # each epoch indexed by epoch_i
    n_raters = data['rater_i'].max() + 1
    n_obs = data.shape[0]  # each spindle marker indexed by t

    # static priors vars
    trust_purcell = 0.1  # crank up to give more weight to purcell et al, 2017
    purcell = np.array([0.3587, 0.6387, 0.0026, 0., 0., 0.]) + (1 - trust_purcell)
    s_number_prior = purcell / purcell.sum()
    max_s = len(s_number_prior) - 1
    gss_spindle_testvals = [1., 5., 10., 15., 20.]
    with pm.Model() as model:

        # True s
        gss = pm.Uniform('gss', lower=0., upper=25., shape=(n_epochs, max_s),
                         testval=np.tile(np.array(gss_spindle_testvals).T, reps=(n_epochs, 1),))  # Real spindles
        gss_per_obs = gss[data['epoch_i'], :]

        # The number of spindles per epoch:
        if fit_for == 'z':
            gss_prior = pm.Dirichlet('gss_prior', a=s_number_prior)
            if n_epochs > 1:
                z = pm.Categorical('z', p=gss_prior,
                                   shape=n_epochs)
            else:
                z = pm.Categorical('z', p=gss_prior)
        else:
            z = fit_data['z']
        z_rs = z.reshape((n_epochs, 1))

        if fit_for in ['w', 'z']:  # when we are finding z or w
            w_prior_possibilities = tt.tril(tt.ones((max_s + 1, max_s + 1)))
            w = pm.Categorical('w', p=w_prior_possibilities[z_rs[data['epoch_i'], 0], :], shape=n_obs)
        else:  # fit for gss
            w = fit_data['w']

        # --- Raters ability to detect markers --- #
        r_E = pm.Bound(pm.Normal, lower=0.)('r_E', mu=0.5, sd=0.5, shape=n_raters)
        r_E_per_obs = r_E[data['rater_i']]
        #r_E = pm.Bound(pm.Normal, lower=0.)('r_E', mu=0.5, sd=0.5)

        # --- Behaviour --- #
        contaminate_dist_s = pm.Uniform.dist(lower=0., upper=25., shape=n_obs)
        contaminate_dist_s.mean = 12.5
        possible_dists = [contaminate_dist_s]
        for i in range(0, 5):
            dist = pm.Normal.dist(mu=gss_per_obs[:, i], sd=r_E_per_obs)
            dist.mean = gss_spindle_testvals[i]
            possible_dists.append(dist)

        w_array = tt.extra_ops.to_one_hot(w, nb_class=max_s + 1)
        s = pm.Mixture('s', w=w_array,
                       comp_dists=possible_dists,
                       observed=data['s'])

        #STEP methods for vars:
        if fit_for == 'z':
            steps = [pm.CategoricalGibbsMetropolis([z, w]),
                     pm.NUTS([gss_prior, gss, r_E], target_accept=0.9)]
        if fit_for == 'w':
            steps = [pm.CategoricalGibbsMetropolis([w]),
                     pm.NUTS([gss, r_E], target_accept=0.9)]
        #else, everything NUTS

    return model, steps
    def _make_model(self):
        pca = self.pca
        mCounts = np.int_(self.counts * self.seq_depth_factor)
        n_dim = pca.n_components_
        n_modes = self.n_modes
        n_samp = mCounts.shape[1]
        n_features = mCounts.shape[0]
        if self.kmeansInit:
            sd_factor = 2 / n_modes
        else:
            sd_factor = 2

        print("Defining model constants...")
        if pca.whiten:
            rot = np.sqrt(pca.explained_variance_[:, None]) * pca.components_
            rot = theano.shared(floatX(rot))
            cSd = floatX(1)
            tcov = np.eye(n_dim)[np.tril_indices(n_dim)] * sd_factor
        else:
            rot = theano.shared(floatX(pca.components_))
            cSd = floatX(np.sqrt(pca.explained_variance_))
            tcov = (np.diag(pca.explained_variance_)[np.tril_indices(n_dim)] *
                    sd_factor)
        shift = theano.shared(floatX(pca.mean_[None, :]),
                              broadcastable=(True, False))

        multiNn = np.sum(mCounts, axis=0)
        print("Counts shape:")
        print(mCounts.shape)
        lcounts = floatX(self.pca.transform(self.tau_log_E_p))
        print("Latent counts shape:")
        print(lcounts.shape)
        high_tumor = self.pheno["tcEst"] > 0.8
        low_tumor = self.pheno["tcEst"] < 0.2
        if self.kmeansInit:
            km = KMeans(n_clusters=n_modes,
                        random_state=0,
                        tol=1e-10,
                        max_iter=100)
            mus_tumor = km.fit(lcounts[high_tumor, :]).cluster_centers_
            mus_free = km.fit(lcounts[low_tumor, :]).cluster_centers_
        else:
            mus_tumor = np.repeat(np.mean(lcounts[high_tumor, :],
                                          axis=0)[None, :],
                                  10,
                                  axis=0)
            mus_free = np.repeat(np.mean(lcounts[low_tumor, :],
                                         axis=0)[None, :],
                                 10,
                                 axis=0)
        mus_tumor = floatX(mus_tumor)
        mus_free = floatX(mus_free)
        try:
            chol_tumor = floatX(
                np.linalg.cholesky(np.cov(lcounts[high_tumor, :].T)))
            chol_tumor = chol_tumor[np.tril_indices(n_dim)] * sd_factor
        except np.linalg.LinAlgError:
            print(
                "Seems we have to few HIGH tumor content samples to infer a starting covariance."
            )
            chol_tumor = tcov
        try:
            chol_free = floatX(
                np.linalg.cholesky(np.cov(lcounts[low_tumor, :].T)))
            chol_free = chol_free[np.tril_indices(n_dim)] * sd_factor
        except np.linalg.LinAlgError:
            print(
                "Seems we have to few LOW tumor content samples to infer a starting covariance."
            )
            chol_free = tcov
        md = self.tau_log_E_p - pca.mean_[None, :]
        dev = md - np.dot(np.dot(md, pca.components_.T), pca.components_)
        dev_std = np.std(dev, axis=0)
        dev_mean = np.mean(dev, axis=0)
        if self.no_deviations is True:
            dev_f = dev_t = None
        else:
            dev_f = dev_t = theano.shared(floatX(dev))

        p_f = floatX(self.p_f)
        p_t = floatX(self.p_t)
        sparsity = floatX(1)
        n = floatX(self.pheno["tcRes"].values[:, None] * self.res_scale)
        tc = floatX(self.pheno["tcEst"].values[:, None])
        lb = floatX(1 - p_f)
        ub = floatX(p_t)
        padding = 1e-1 * (ub - lb)
        pa_start = ((n * tc) + 1) / (n + 2)
        pa_start = np.where(pa_start < lb, lb + padding, pa_start)
        pa_start = np.where(pa_start > ub, ub - padding, pa_start)
        pa_start = floatX(pa_start)

        def inverse_pca(X):
            return pm.math.dot(X, rot) + shift

        def pa2alpha(p_a):
            return (p_a + p_f - 1) / (p_t + p_f - 1)

        def alpha2pa(alpha):
            return (alpha * (p_t + p_f - 1)) - p_f + 1

        def mixSep(x_f, x_t, alpha, dev_f, dev_t):
            exp_f = inverse_pca(x_f)
            exp_t = inverse_pca(x_t)
            if dev_f is not None:
                exp_f += dev_f
            if dev_t is not None:
                exp_t += dev_t
            exp_f = tt.nnet.softmax(exp_f)
            exp_t = tt.nnet.softmax(exp_t)
            result = ((1 - alpha) * exp_f) + (alpha * exp_t)
            return result

        print("Making model...")
        with pm.Model() as model:
            # bounds with nummerical padding
            p_a = pm.Beta(
                "p_a",
                alpha=floatX((n * tc) + 1),
                beta=floatX((n * (1 - tc)) + 1),
                transform=pm.distributions.transforms.Interval(lb, ub),
                shape=(n_samp, 1),
                testval=pa_start,
            )
            alpha = pm.Deterministic("alpha", pa2alpha(p_a))
            sdd = pm.HalfNormal.dist(sd=cSd * self.relax_prior)

            x_f_comps = list()
            for i in range(n_modes):
                mus_f = pm.Normal(
                    "mus_f_{}".format(i),
                    mu=0,
                    sd=cSd * self.relax_prior,
                    shape=n_dim,
                    testval=mus_free[i, :],
                )
                packed_L_f = pm.LKJCholeskyCov(
                    "packed_L_f_{}".format(i),
                    n=n_dim,
                    eta=sparsity,
                    sd_dist=sdd,
                    testval=chol_free,
                )
                chol_f = pm.expand_packed_triangular(n_dim,
                                                     packed_L_f,
                                                     lower=True)
                x_f_comps.append(
                    pm.MvNormal.dist(mu=mus_f,
                                     chol=chol_f,
                                     shape=(n_samp, n_dim)))
            if n_modes > 1:
                w_f = pm.Dirichlet("w_f",
                                   a=np.ones(n_modes) * self.dirichlet_prior)
                x_f = pm.Mixture(
                    "x_f",
                    w=w_f,
                    comp_dists=x_f_comps,
                    shape=(n_samp, n_dim),
                    testval=lcounts,
                )
            else:
                x_f = pm.MvNormal("x_f",
                                  mu=mus_f,
                                  chol=chol_f,
                                  shape=(n_samp, n_dim))

            if self.same_kernels:
                x_t_comps = x_f_comps
            else:
                x_t_comps = list()
                for i in range(n_modes):
                    mus_t = pm.Normal(
                        "mus_t_{}".format(i),
                        mu=0,
                        sd=cSd * self.relax_prior,
                        shape=n_dim,
                        testval=mus_tumor[i, :],
                    )
                    packed_L_t = pm.LKJCholeskyCov(
                        "packed_L_t_{}".format(i),
                        n=n_dim,
                        eta=sparsity,
                        sd_dist=sdd,
                        testval=chol_tumor,
                    )
                    chol_t = pm.expand_packed_triangular(n_dim,
                                                         packed_L_t,
                                                         lower=True)
                    x_t_comps.append(
                        pm.MvNormal.dist(mu=mus_t,
                                         chol=chol_t,
                                         shape=(n_samp, n_dim)))
            if n_modes > 1:
                w_t = pm.Dirichlet("w_t",
                                   a=np.ones(n_modes) * self.dirichlet_prior)
                x_t = pm.Mixture(
                    "x_t",
                    w=w_t,
                    comp_dists=x_t_comps,
                    shape=(n_samp, n_dim),
                    testval=lcounts,
                )
            else:
                x_t = pm.MvNormal("x_t",
                                  mu=mus_t,
                                  chol=chol_t,
                                  shape=(n_samp, n_dim))

            if self.sample_deviation is True:
                dev_f = pm.Normal(
                    "dev_f",
                    mu=dev_mean,
                    sigma=dev_std,
                    shape=(n_samp, n_features),
                    testval=dev,
                )
                dev_t = pm.Normal(
                    "dev_t",
                    mu=dev_mean,
                    sigma=dev_std,
                    shape=(n_samp, n_features),
                    testval=dev,
                )

            if self.hazard_model == "cox":
                b = pm.Normal("logHR", mu=0, sigma=1, shape=(2 * n_dim, 1))
                for ev in self.events:
                    ind = ev["mask"].values
                    obs = np.array(ev["index_among"])
                    expressions = tt.concatenate([x_t[ind, :], x_f[ind, :]],
                                                 axis=1)
                    hazard = tt.exp(tt.dot(expressions, b)).T
                    evp = pm.Categorical("event_{}".format(ev["sample"]),
                                         hazard,
                                         observed=obs)
            elif self.hazard_model == "mk":
                # This in not implemented and aims to model hazard with a gaussian mixture
                b = pm.Normal("kernel_weights", mu=0, sigma=1, shape=(10, ))
                pass

            x = pm.Deterministic("x", mixSep(x_f, x_t, alpha, dev_f, dev_t))
            if self.use_multinomial:
                obs = pm.Multinomial("obs",
                                     n=multiNn,
                                     p=x,
                                     observed=mCounts.T,
                                     dtype="int64")
            else:
                dist = pm.Dirichlet.dist(mCounts.T + 1)
                pot = pm.Potential("obs", dist.logp(x))
        return model
예제 #29
0
파일: chpt_.py 프로젝트: josepm/Bayes
K = len(cpf)   # max breakpoints
P = 1  #len(cpf)
with pm.Model() as model:      # The DP priors to obtain w, the cluster weights
    alpha = pm.Gamma('alpha', 1.0, 1.0, shape=1)
    beta = pm.Beta('beta', 1, alpha, shape=K)
    w = pm.Deterministic('w', stick_breaking(beta))

    # psi = pm.Uniform('psi', shape=(P, 1))
    # prob = pm.Uniform('prob', shape=(P, 1))
    # zb = pm.ZeroInflatedBinomial('zb', psi=psi, n=P, p=prob, shape=(P, K))
    # obs = pm.Mixture('obs', w, zb, shape=(P, 1), observed=cpf['w_trend'][:, None])

    # Prior on Bernoulli parameters, use Jeffrey's conjugate-prior
    # theta = pm.Beta('theta', 0.5, 0.5, shape=(P, K))
    theta = pm.Beta('theta', alpha=10.0, beta=0.5, shape=(P, K))
    obs = pm.Mixture('obs', w, pm.Bernoulli.dist(theta, shape=(P, K)), shape=(P, 1), observed=cpf['w_trend'][:, None])

    step_method = pm.NUTS(target_accept=0.90, max_treedepth=15)
    cpt_trace = pm.sample(1000, chains=None, step=step_method, tune=1000)
    cpt_smry = pm.summary(cpt_trace)
    pm.traceplot(cpt_trace)
    spp = pm.sample_posterior_predictive(cpt_trace, samples=1000, progressbar=False, var_names=['w', 'theta', 'alpha'])  #, 'alpha', 'obs'])
    # spp = pm.sample_posterior_predictive(cpt_trace, samples=1000, progressbar=False, var_names=['w', 'zb', 'prob', 'psi'])

K = 30
P =  len(cpf)  #
with pm.Model() as model:
    # The DP priors to obtain w, the cluster weights
    alpha = pm.Gamma('alpha', 1., 1.)
    beta = pm.Beta('beta', 1, alpha, shape=K)
    w = pm.Deterministic('w', stick_breaking(beta))
예제 #30
0
    def __init__(self,
                 dimension,
                 mu_data,
                 tau_data,
                 prior="Gaussian",
                 parameters={
                     "location": None,
                     "scale": None,
                     "corr": False
                 },
                 hyper_alpha=None,
                 hyper_beta=None,
                 hyper_gamma=None,
                 hyper_delta=None,
                 transformation=None,
                 parametrization="non-central",
                 name='',
                 model=None):

        assert isinstance(dimension, int), "dimension must be integer!"
        assert dimension in [3, 5, 6], "Not a valid dimension!"

        D = dimension

        # 2) call super's init first, passing model and name
        # to it name will be prefix for all variables here if
        # no name specified for model there will be no prefix
        super().__init__(str(D) + "D", model)
        # now you are in the context of instance,
        # `modelcontext` will return self you can define
        # variables in several ways note, that all variables
        # will get model's name prefix

        #------------------- Data ------------------------------------------------------
        N = int(len(mu_data) / D)
        if N == 0:
            sys.exit(
                "Data has length zero!. You must provide at least one data point"
            )
        #-------------------------------------------------------------------------------

        #============= Transformations ====================================

        if transformation is "mas":
            Transformation = Iden

        elif transformation is "pc":
            if D is 3:
                Transformation = cartesianToSpherical
            elif D is 6:
                Transformation = phaseSpaceToAstrometry_and_RV
            elif D is 5:
                Transformation = phaseSpaceToAstrometry
                D = 6

        else:
            sys.exit("Transformation is not accepted")
        #==================================================================

        #================ Hyper-parameters =====================================
        if hyper_delta is None:
            shape = 1
        else:
            shape = len(hyper_delta)

        #--------- Location ----------------------------------
        if parameters["location"] is None:

            location = [
                pm.Normal("loc_{0}".format(i),
                          mu=hyper_alpha[i][0],
                          sigma=hyper_alpha[i][1],
                          shape=shape) for i in range(D)
            ]

            #--------- Join variables --------------
            mu = pm.math.stack(location, axis=1)

        else:
            mu = parameters["location"]
        #------------------------------------------------------

        #------------- Scale --------------------------
        if parameters["scale"] is None:
            scale = [
                pm.Gamma("scl_{0}".format(i),
                         alpha=2.0,
                         beta=2.0 / hyper_beta[i][0],
                         shape=shape) for i in range(D)
            ]

        else:
            scale = parameters["scale"]
        #--------------------------------------------------

        #----------------------- Correlation -----------------------------------------
        if parameters["corr"]:
            pm.LKJCorr('chol_corr', eta=hyper_gamma, n=D)
            C = tt.fill_diagonal(
                self.chol_corr[np.zeros((D, D), dtype=np.int64)], 1.)
            # print_ = tt.printing.Print('C')(C)
        else:
            C = np.eye(D)
        #-----------------------------------------------------------------------------

        #-------------------- Covariance -------------------------
        sigma_diag = pm.math.stack(scale, axis=1)
        cov = theano.shared(np.zeros((shape, D, D)))

        for i in range(shape):
            sigma = tt.nlinalg.diag(sigma_diag[i])
            covi = tt.nlinalg.matrix_dot(sigma, C, sigma)
            cov = tt.set_subtensor(cov[i], covi)
        #---------------------------------------------------------
        #========================================================================

        #===================== True values ============================================
        if prior is "Gaussian":
            pm.MvNormal("source", mu=mu, cov=cov[0], shape=(N, D))

        elif prior is "GMM":
            pm.Dirichlet("weights", a=hyper_delta, shape=shape)

            comps = [
                pm.MvNormal.dist(mu=mu[i], cov=cov[i]) for i in range(shape)
            ]

            pm.Mixture("source",
                       w=self.weights,
                       comp_dists=comps,
                       shape=(N, D))

        else:
            sys.exit("The specified prior is not supported")
        #=================================================================================

        #----------------------- Transformation---------------------------------------
        transformed = Transformation(self.source)
        #-----------------------------------------------------------------------------

        #------------ Flatten --------------------------------------------------------
        true = pm.math.flatten(transformed)
        #----------------------------------------------------------------------------

        #----------------------- Likelihood ----------------------------------------
        pm.MvNormal('obs', mu=true, tau=tau_data, observed=mu_data)
        #------------------------------------------------------------------------------