def test_multivariate_observations(self):
     coords = {"direction": ["x", "y", "z"], "experiment": np.arange(20)}
     data = np.random.multinomial(20, [0.2, 0.3, 0.5], size=20)
     with pm.Model(coords=coords):
         p = pm.Beta("p", 1, 1, size=(3, ))
         pm.Multinomial("y",
                        20,
                        p,
                        dims=("experiment", "direction"),
                        observed=data)
         idata = pm.sample(draws=50,
                           chains=2,
                           tune=100,
                           return_inferencedata=True)
     test_dict = {
         "posterior": ["p"],
         "sample_stats": ["lp"],
         "log_likelihood": ["y"],
         "observed_data": ["y"],
     }
     fails = check_multiple_attrs(test_dict, idata)
     assert not fails
     assert "direction" not in idata.log_likelihood.dims
     assert "direction" in idata.observed_data.dims
     assert idata.log_likelihood["y"].shape == (2, 50, 20)
Example #2
0
def mv_simple_discrete():
    d = 2
    n = 5
    p = floatX_array([0.15, 0.85])
    with pm.Model() as model:
        pm.Multinomial("x", n, at.constant(p), initval=np.array([1, 4]))
        mu = n * p
        # covariance matrix
        C = np.zeros((d, d))
        for (i, j) in product(range(d), range(d)):
            if i == j:
                C[i, i] = n * p[i] * (1 - p[i])
            else:
                C[i, j] = -n * p[i] * p[j]

    return model.compute_initial_point(), model, (mu, C)
Example #3
0
 def test_multivariate2(self):
     # Added test for issue #3271
     mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10)
     with pm.Model() as dm_model:
         probs = pm.Dirichlet("probs", a=np.ones(6))
         obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
         burned_trace = pm.sample(
             20, tune=10, cores=1, return_inferencedata=False, compute_convergence_checks=False
         )
     sim_priors = pm.sample_prior_predictive(
         return_inferencedata=False, samples=20, model=dm_model
     )
     sim_ppc = pm.sample_posterior_predictive(
         burned_trace, return_inferencedata=False, samples=20, model=dm_model
     )
     assert sim_priors["probs"].shape == (20, 6)
     assert sim_priors["obs"].shape == (20,) + mn_data.shape
     assert sim_ppc["obs"].shape == (20,) + mn_data.shape
Example #4
0
def mv_simple_discrete():
    d = 2
    n = 5
    p = np.array([.15, .85])

    with pm.Model() as model:
        x = pm.Multinomial('x',
                           n,
                           pm.constant(p),
                           shape=d,
                           testval=np.array([1, 4]))
        mu = n * p

        #covariance matrix
        C = np.zeros((d, d))
        for (i, j) in product(range(d), range(d)):
            if i == j:
                C[i, i] = n * p[i] * (1 - p[i])
            else:
                C[i, j] = -n * p[i] * p[j]

    return model.test_point, model, (mu, C)
Example #5
0
    def test_multivariate(self):
        with pm.Model():
            m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25]))
            trace = pm.sample_prior_predictive(10)

        assert trace.prior["m"].shape == (1, 10, 4)
Example #6
0
completed_pi_list = [
    pymc.CompletedDirichlet('completed_pi_%d' % i, dist)
    for i, dist in enumerate(pi_list)
]

# Indicator variables of whether the pth person is in a group or not
# DIMENSIONS: 1 x (num_people^2) for each list, where each element is Kx1
# DOMAIN : {0,1}, only one element of vector is 1, all else 0
# DISTRIBUTION: Categorical (using Multinomial with 1 observation)
z_pTq_matrix = np.empty([num_people, num_people], dtype=object)
z_pFq_matrix = np.empty([num_people, num_people], dtype=object)
for p_person in range(num_people):
    for q_person in range(num_people):
        z_pTq_matrix[p_person, q_person] = pymc.Multinomial(
            'z_%dT%d_vector' % (p_person, q_person),
            n=1,
            p=pi_list[p_person],
            trace=False)
        z_pFq_matrix[p_person, q_person] = pymc.Multinomial(
            'z_%dF%d_vector' % (p_person, q_person),
            n=1,
            p=pi_list[q_person],
            trace=False)


#---------------------------- Data Level ---------------------------------#
# Combination of Priors to build the scalar parameter for y~Bernoulli
@pymc.deterministic
def bernoulli_parameters(z_pTq=z_pTq_matrix, z_pFq=z_pFq_matrix, B=B_matrix):
    """
    Takes in the two z_lists of Categorical Stochastic Objects
Example #7
0
                           theta=numpy.repeat(1.0, d.shape[2]),
                           trace=True)

    ddiff = pymc.Lambda(
        "ddiff",
        lambda dir_1=pymc.CompletedDirichlet(
            "cdir_1", dir_1, trace=False), dir_2=pymc.CompletedDirichlet(
                "cdir_2", dir_2, trace=False): dir_2[0] - dir_1[0],
        trace=True)

    vals_1 = ds[anno[ctrl], igene]
    vals_2 = ds[anno[cond], igene]

    mn_1 = pymc.Multinomial("mn_1",
                            value=vals_1,
                            n=vals_1.sum(axis=1),
                            p=dir_1,
                            observed=True,
                            trace=False)
    mn_2 = pymc.Multinomial("mn_2",
                            value=vals_2,
                            n=vals_2.sum(axis=1),
                            p=dir_2,
                            observed=True,
                            trace=False)

    #mp = pymc.MAP([dir_1, dir_2, mn_1, mn_2])
    #mp.fit()
    dir_1.value = array([0.25] * 3)
    dir_2.value = array([0.25] * 3)

    mcmc = pymc.MCMC([dir_1, dir_2, mn_1, mn_2, ddiff])
Example #8
0
NUM_DRAWS = 100
NUM_SAMPLES = 50
TRUE_PROPS = [0.6, 0.3, 1.0]


def generate_data():
    data = []
    for i in range(NUM_SAMPLES):
        x = numpy.random.multinomial(NUM_DRAWS, TRUE_PROPS)
        data.append(x)
    return data


##############################################################################
# model

props = pymc.Dirichlet(
    name="props",
    theta=[1.0, 1.0, 1.0],
)
draws = pymc.Multinomial(name="draws",
                         value=generate_data(),
                         n=NUM_DRAWS,
                         p=props,
                         observed=True)
mcmc = pymc.MCMC([props, draws])
mcmc.sample(iter=100000, burn=10000, thin=100)
# mcmc.sample(iter=1000, burn=100, thin=1)
summarize(mcmc, "props")
Example #9
0
def create_model(data_matrix, num_people, num_groups, alpha, B):
    """
    Function that takes in a set of data and returns a dictionary of MCMC object
    of each random variable.  In addition, the hyperparameters for each random
    variable will be imbued in the objects.

    Overcommented for readibility's sake
    """
    #---------------------------- Data Transform -----------------------------#
    #indices = np.indices(dimesions = np.shape(data_matrix))
    #people_indices = indices[0].reshape(num_people*num_people)
    data_vector = data_matrix.reshape(num_people * num_people).T

    #---------------------------- Hyperparameters ----------------------------#
    # Average probability distribution of being in groups
    # DIMENSIONS: 1 x num_groups
    # SUPPORT: (0,inf)
    # DISTRIBUTION: None
    alpha_vector = alpha

    # Matrix of inter-group correlations
    # DIMENSIONS: num_groups x num_groups
    # SUPPORT: [0,1]
    # DISTRIBUTION: None
    B_matrix = B

    #---------------------------- Prior Parameters ---------------------------#
    # Actual group membership probabilities for each person
    # DIMENSIONS: 1 x (num_people * num_groups)
    # SUPPORT: (0,1], Elements of each vector should sum to 1 for each person
    # DISTRIBUTION: Dirichlet(alpha)
    pi_list = np.empty(num_people, dtype=object)
    for person in range(num_people):
        person_pi = pymc.Dirichlet('pi_%i' % person, theta=alpha_vector)
        pi_list[person] = person_pi

    # Indicator variables of whether the pth person is in a group or not
    # DIMENSIONS: 1 x (num_people^2) for each list, where each element is Kx1
    # DOMAIN : {0,1}, only one element of vector is 1, all else 0
    # DISTRIBUTION: Categorical (using Multinomial with 1 observation)
    z_pTq_matrix = np.empty([num_people, num_people], dtype=object)
    z_pFq_matrix = np.empty([num_people, num_people], dtype=object)
    for p_person in range(num_people):
        for q_person in range(num_people):
            z_pTq_matrix[p_person, q_person] = pymc.Multinomial(
                'z_%dT%d_vector' % (p_person, q_person),
                n=1,
                p=pi_list[p_person])
            z_pFq_matrix[p_person, q_person] = pymc.Multinomial(
                'z_%dF%d_vector' % (p_person, q_person),
                n=1,
                p=pi_list[q_person])

    #---------------------------- Data Level ---------------------------------#
    # Combination of Priors to build the scalar parameter for y~Bernoulli
    @pymc.deterministic
    def bernoulli_parameters(z_pTq=z_pTq_matrix,
                             z_pFq=z_pFq_matrix,
                             B=B_matrix):
        """
        Takes in the two z_lists of Categorical Stochastic Objects
        Take their values (using Deterministic class)
        Dot Product with z'Bz
        """
        bernoulli_parameters = np.empty([num_people, num_people], dtype=object)
        for p in range(num_people):
            for q in range(num_people):
                bernoulli_parameters[p, q] = np.dot(np.dot(z_pTq[p, q], B),
                                                    z_pFq[p, q])
        return bernoulli_parameters.reshape(1, num_people * num_people)

    # Observed response when person p is asked whether q is "connected"
    # Reshaped such that each person is asked sequentiall about all others, then
    # next person's vector, etc.
    #
    # Includes information about both p and q's group membership
    # y = Bern(z_p2p * B * z_q2p
    # y in {0,1}
    # DIMENSIONS: 1 x (num_people * num_people)
    y_vector = pymc.Bernoulli('y_vector',
                              p=bernoulli_parameters,
                              value=data_vector,
                              observed=True)

    #---------------------------- Return all MCMC Objects --------------------#
    return locals()
Example #10
0
#multi = pm.Multinomial('multi', n=poisson, p=dirich, value=grid1d.values(), observed=True) #want to do this
#multi = pm.Multinomial('multi', n=poisson, p=dirich) #works
#multi = pm.Multinomial('multi', n=poisson, p=dirich, value=[0,0,0,0,0,0,1,1,1], observed=True) #works when n == sum(value)
"""
multi = pm.Multinomial('multi', p=dirich, observed=True, 
                        n = [ np.sum(grid1d.values()[i]) for i in range( 0, len(grid1d.values()) ) ] , 
                        value = [ grid1d.values()[i] for i in range( 0, len(grid1d.values()) ) ] )
model = pm.Model([multi, dirich], name = 'model')
"""

#similar to the poisson's 'value =' except each grid get's it's own daily count,
#instead of adding all of the grid-squares together (no np.sum() here)
multi = pm.Multinomial(
    'multi',
    p=dirich,
    observed=True,
    n=poisson,
    value=[grid1d.values()[i] for i in range(0, len(grid1d.values()))])
model = pm.Model([multi, dirich, poisson, expon], name='model')

# <codecell>

mcmc = pm.MCMC(model)
mcmc.sample(200, 100, 1)

# <codecell>

dirich_samples = mcmc.trace('dirich')[:]
expon_sapmples = mcmc.trace('expon')[:]
#no samples from these last two b/c they're observed
#poisson_samples = mcmc.trace('poisson1')[:]
Example #11
0
S = []
S_steps = []
data_list = []
for name in datasets.iterkeys():
    
    this_dataset = datasets[name]
    
    @pm.deterministic
    def this_asc_slice(asc=asc, slices = age_slices[name], dataset=this_dataset):
        out = []
        for i in xrange(len(dataset)):
            out.append(sum(asc[slices[i]]))
        out = array(out)
        return out
    
    S_now = pm.Dirichlet(('S_%s'%name).replace('.','_'), this_asc_slice)
    S.append(S_now)
    data_list.append(pm.Multinomial('data',n=sum(this_dataset.N),p=S_now,value=this_dataset.N,isdata=True))
    
S_pred = pm.Dirichlet('S_pred',asc)
    
M = pm.MCMC({'variables': [sc, scg, alph, asc, S, data_list, S_pred],
            '__name__': 'age_dist_model',
            'step_methods': S_steps},
            db='hdf5',comp_level = 5)

M.use_step_method(pm.Metropolis, alph, sig=.05)

for i in xrange(len(datasets)):    
    M.use_step_method(pm.DirichletMultinomial, S[i])
Example #12
0
p = pm.Beta("p",alpha=1,beta=1)
n = pm.Binomial("Bino",n=19,p=p,value=5,observed=True)
mcmc = pm.MCMC([n,p])
mcmc.sample(25000)

%matplotlib inline
from pymc.Matplot import plot as mcplot
mcplot(mcmc.trace("p"),common_scale=False)

# a simple demo for Dirichlet-Multinomal Conjugate
N = 5 # dimension
beta = np.ones(N)
mu=pm.Dirichlet("mu", theta=beta)
cmu = pm.CompletedDirichlet("cmu", D=mu)

n = pm.Multinomial('n', n=D, p=cmu, value=n_class, observed=True)

alpha = np.ones(N)

theta = pm.Container([pm.Dirichlet("theta_%s" % i,theta=alpha) \
                      for i in range(N)])
ctheta = pm.Container([pm.CompletedDirichlet("ctheta_%s" % i, D=theta[i]) for i in range(N)])
c = pm.Container([pm.Multinomial("c_%s" % i, n=n_class[i], p=theta[i]\
                                ,value = data[i], observed=True)\
                 for i in range(N)])

@pm.deterministic
def precision(mu=cmu, theta=ctheta):
    return np.sum([mu[0][i]*theta[i][0][i] for i in range(N)])

Example #13
0
 cur_obs = np.array([bpheb[where_bphe], bphe0[where_bphe]]).T
 n = np.sum(cur_obs, axis=1)
 # Need to have (b and not 0) on either chromosome
 p_bphe = pm.Lambda('p_bphe', lambda pb=pb[where_bphe], p0=p0[where_bphe], p1=p1: 1-(1-pb*(1-p0))**2, trace=False)
 data_bphe = pm.Binomial('data_bphe', p=p_bphe, n=n, value=bpheb[where_bphe], observed=True)
     
 where_phe = np.where(datatype=='phe')
 cur_obs = np.array([pheab[where_phe],phea[where_phe],pheb[where_phe],phe0[where_phe]]).T
 n = np.sum(cur_obs, axis=1)
 p_phe = pm.Lambda('p_%i'%i, lambda pb=pb[where_phe], p0=p0[where_phe], p1=p1: np.array([\
     g_freqs['ab'](pb,p0,p1),
     g_freqs['a0'](pb,p0,p1)+g_freqs['a1'](pb,p0,p1)+g_freqs['aa'](pb,p0,p1),
     g_freqs['b0'](pb,p0,p1)+g_freqs['b1'](pb,p0,p1)+g_freqs['bb'](pb,p0,p1),
     g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)]).T, trace=False)
 np.testing.assert_almost_equal(p_phe.value.sum(axis=1), 1)
 data_phe = pm.Multinomial('data_phe', p=p_phe, n=n, value=cur_obs, observed=True)    
 
 where_gen = np.where(datatype=='gen')
 cur_obs = np.array([genaa[where_gen],genab[where_gen],gena0[where_gen],gena1[where_gen],genbb[where_gen],genb0[where_gen],genb1[where_gen],gen00[where_gen],gen01[where_gen],gen11[where_gen]]).T
 n = np.sum(cur_obs,axis=1)
 p_gen = pm.Lambda('p_gen', lambda pb=pb[where_gen], p0=p0[where_gen], p1=p1, g_freqs=g_freqs: \
     np.array([g_freqs[key](pb,p0,p1) for key in ['aa','ab','a0','a1','bb','b0','b1','00','01','11']]).T, trace=False)
 np.testing.assert_almost_equal(p_gen.value.sum(axis=1), 1)
 data_gen = pm.Multinomial('data_gen', p=p_gen, n=n, value=cur_obs, observed=True)
 
 # Now vivax.
 cur_obs = np.array([vivax_pos[where_vivax], vivax_neg[where_vivax]]).T
 pphe0 = pm.Lambda('pphe0_%i'%i, lambda pb=pb[where_vivax], p0=p0[where_vivax], p1=p1: (g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)), trace=False)
 p_vivax = pm.Lambda('p_vivax', lambda pphe0=pphe0, pv=pv: pv*(1-pphe0), trace=False)
 try:
     warnings.warn('Not using age correction')
Example #14
0
File: model.py Project: zkxshg/pymc
def make_model(lon, lat, africa, n, datatype, genaa, genab, genbb, gen00,
               gena0, genb0, gena1, genb1, gen01, gen11, pheab, phea, pheb,
               phe0, prom0, promab, aphea, aphe0, bpheb, bphe0):

    logp_mesh = np.vstack((lon, lat)).T * np.pi / 180.

    # Probability of mutation in the promoter region, given that the other thing is a.
    p1 = pm.Uniform('p1', 0, .04, value=.01)

    # Spatial submodels
    spatial_b_vars = make_gp_submodel('b',
                                      logp_mesh,
                                      africa,
                                      with_africa_covariate=True)
    spatial_s_vars = make_gp_submodel('0', logp_mesh)
    sp_sub_b = spatial_b_vars['sp_sub']
    sp_sub_s = spatial_s_vars['sp_sub']

    # Loop over data clusters, adding nugget and applying link function.
    tilde_fs_d = []
    p0_d = []
    tilde_fb_d = []
    pb_d = []
    V_b = spatial_b_vars['V']
    V_s = spatial_s_vars['V']
    data_d = []

    for i in xrange(len(n)):
        this_fb = sp_sub_b.f_eval[i]
        this_fs = sp_sub_s.f_eval[i]

        # Nuggeted field in this cluster
        tilde_fb_d.append(
            pm.Normal('tilde_fb_%i' % i,
                      this_fb,
                      1. / V_b,
                      value=np.random.normal(),
                      trace=False))
        tilde_fs_d.append(
            pm.Normal('tilde_fs_%i' % i,
                      this_fs,
                      1. / V_s,
                      value=np.random.normal(),
                      trace=False))

        # The frequencies.
        p0 = pm.Lambda('pb_%i' % i,
                       lambda lt=tilde_fb_d[-1]: pm.invlogit(lt),
                       trace=False)
        pb = pm.Lambda('p0_%i' % i,
                       lambda lt=tilde_fs_d[-1]: pm.invlogit(lt),
                       trace=False)

        # The likelihoods
        if datatype[i] == 'prom':
            cur_obs = [prom0[i], promab[i]]
            # Need to have either b and 0 or a and 1 on both chromosomes
            p = pm.Lambda('p_%i' % i,
                          lambda pb=pb, p0=p0, p1=p1: (pb * p0 +
                                                       (1 - pb) * p1)**2,
                          trace=False)
            n = np.sum(cur_obs)
            data_d.append(
                pm.Binomial('data_%i' % i,
                            p=p,
                            n=n,
                            value=prom0[i],
                            observed=True))

        elif datatype[i] == 'aphe':
            cur_obs = [aphea[i], aphe0[i]]
            n = np.sum(cur_obs)
            # Need to have (a and not 1) on either chromosome, or not (not (a and not 1) on both chromosomes)
            p = pm.Lambda('p_%i' % i,
                          lambda pb=pb, p0=p0, p1=p1: 1 - (1 - (1 - pb) *
                                                           (1 - p1))**2,
                          trace=False)
            data_d.append(
                pm.Binomial('data_%i' % i,
                            p=p,
                            n=n,
                            value=aphea[i],
                            observed=True))

        elif datatype[i] == 'bphe':
            cur_obs = [bpheb[i], bphe0[i]]
            n = np.sum(cur_obs)
            # Need to have (b and not 0) on either chromosome
            p = pm.Lambda('p_%i' % i,
                          lambda pb=pb, p0=p0, p1=p1: 1 - (1 - pb *
                                                           (1 - p0))**2,
                          trace=False)
            data_d.append(
                pm.Binomial('data_%i' % i,
                            p=p,
                            n=n,
                            value=aphea[i],
                            observed=True))

        elif datatype[i] == 'phe':
            cur_obs = np.array([pheab[i], phea[i], pheb[i], phe0[i]])
            n = np.sum(cur_obs)
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: np.array([\
                g_freqs['ab'](pb,p0,p1),
                g_freqs['a0'](pb,p0,p1)+g_freqs['a1'](pb,p0,p1)+g_freqs['aa'](pb,p0,p1),
                g_freqs['b0'](pb,p0,p1)+g_freqs['b1'](pb,p0,p1)+g_freqs['bb'](pb,p0,p1),
                g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)]), trace=False)
            np.testing.assert_almost_equal(p.value.sum(), 1)
            data_d.append(
                pm.Multinomial('data_%i' % i,
                               p=p,
                               n=n,
                               value=cur_obs,
                               observed=True))

        elif datatype[i] == 'gen':
            cur_obs = np.array([
                genaa[i], genab[i], gena0[i], gena1[i], genbb[i], genb0[i],
                genb1[i], gen00[i], gen01[i], gen11[i]
            ])
            n = np.sum(cur_obs)
            p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1, g_freqs=g_freqs: \
                np.array([g_freqs[key](pb,p0,p1) for key in ['aa','ab','a0','a1','bb','b0','b1','00','01','11']]), trace=False)
            np.testing.assert_almost_equal(p.value.sum(), 1)
            data_d.append(
                pm.Multinomial('data_%i' % i,
                               p=p,
                               n=n,
                               value=cur_obs,
                               observed=True))

    # The fields plus the nugget, in convenient vector form
    @pm.deterministic
    def tilde_fb(tilde_fb_d=tilde_fb_d):
        """Concatenated version of tilde_fb, for postprocessing & Gibbs sampling purposes"""
        return np.hstack(tilde_fb_d)

    @pm.deterministic
    def tilde_fs(tilde_fs_d=tilde_fs_d):
        """Concatenated version of tilde_fs, for postprocessing & Gibbs sampling purposes"""
        return np.hstack(tilde_fs_d)

    return locals()
Example #15
0
from pylab import *

import pymc
from pymc import Matplot
import numpy as np
from scipy.misc import factorial
import spacepy.plot as spp

data=np.array([33,66,1])
rates=pymc.Uniform('rates',0,100,size=4,value=[0.01,2,10,1])

@pymc.deterministic(plot=True)
def prob(rates=rates):
    return np.array([0.33,0.66,0.01])

likelihood=pymc.Multinomial('likelihood',n=sum(data),p=prob,value=data,observed=True)
M = pymc.MCMC(likelihood)

M.sample(100000)

Matplot.summary_plot(M)

#
# @pymc.observed
# def y(value=1):
#     pymc.categorical_like()
#
#     return 10**value * np.exp(-10)/ factorial(value)
#
# M = pymc.MCMC(y)