Beispiel #1
0
def get_model(data, K, alpha, sigma, sigma2, eta, *args, **kargs):
    r = data.pivot(index='MUTID', columns='SAMPLEID', values='r').values
    R = data.pivot(index='MUTID', columns='SAMPLEID', values='R').values
    VAF0 = data.pivot(index='MUTID', columns='SAMPLEID', values='VAF0').values
    r, R, VAF0 = r[:, :, None], R[:, :, None], VAF0[:, :, None]

    nsamples = data.SAMPLEID.nunique()

    idxs = aux.corr_vector_to_matrix_indices(nsamples)
    D = tns.eye(nsamples) * sigma**2
    with pmc.Model() as model:
        # alpha = pmc.Gamma('alpha', 1.0, 1.0)
        u = pmc.Beta('u', 1.0, alpha, shape=K - 1)
        lw = pmc.Deterministic('lw', aux.stick_breaking_log(u))

        C_ = pmc.LKJCorr('C', eta=eta, n=nsamples)
        C = tns.fill_diagonal(C_[idxs], 1.0)
        Sigma = D.dot(C)
        psi = pmc.MvNormal('psi',
                           mu=nmp.zeros(nsamples),
                           cov=Sigma,
                           shape=(K, nsamples))
        phi = pmc.Deterministic('phi', pmc.invlogit(psi.T))

        # psi = pmc.MvNormal('psi', mu=nmp.zeros(nsamples), cov=D, shape=(K, nsamples))
        # phi = pmc.Deterministic('phi', pmc.invlogit(psi.T))

        theta = pmc.Deterministic('theta', VAF0 * phi[None, :, :])

        pmc.DensityDist('r', aux.binmixND_logp_fcn(R, theta, lw), observed=r)
    return model
Beispiel #2
0
def get_model(x, r, R, vaf0, K=10):
    nsamples = r.shape[1]
    r, R, vaf0 = r[:, :, None], R[:, :, None], vaf0[:, :, None]
    idxs = aux.corr_vector_to_matrix_indices(K)
    with pmc.Model() as model:
        w = pmc.Dirichlet('w', nmp.ones(K))
        lw = tns.log(w)

        # alpha = pmc.Gamma('alpha', 1.0, 1.0)
        # u = pmc.Beta('u', 1.0, alpha, shape=K-1)
        # lw = aux.stick_breaking_log(u)

        rho = pmc.Gamma('rho', 1.0, 1.0)
        Cc = tns.fill_diagonal(pmc.LKJCorr('C', eta=2.0, n=K)[idxs], 1.0)
        Cr = aux.cov_quad_exp(x, 1.0, rho)
        mu_psi = pmc.MatrixNormal('mu_psi',
                                  mu=nmp.zeros((nsamples, K)),
                                  rowcov=Cr,
                                  colcov=Cc,
                                  shape=(nsamples, K))
        psi = pmc.Normal('psi', mu=mu_psi, sd=0.1, shape=(nsamples, K))
        phi = pmc.Deterministic('phi', pmc.invlogit(psi))

        # psi = pmc.MvNormal('psi', mu=nmp.zeros(K), tau=nmp.eye(K), shape=(nsamples, K))
        # phi = pmc.Deterministic('phi', pmc.invlogit(psi))

        theta = pmc.Deterministic('theta', vaf0 * phi[None, :, :])
        pmc.DensityDist('r', aux.binmixND_logp_fcn(R, theta, lw), observed=r)
    return model
Beispiel #3
0
def multivariatenormal(init_mean,
                       init_sigma,
                       init_corr,
                       suffix="",
                       dist=False):
    if not isinstance(suffix, str):
        suffix = str(suffix)
    D = len(init_sigma)

    sigma = pm.Lognormal('sigma' + suffix,
                         np.zeros(D, dtype=np.float),
                         np.ones(D),
                         shape=D,
                         testval=init_sigma)
    nu = pm.Uniform('nu' + suffix, 0, 5)
    C_triu = pm.LKJCorr('C_triu' + suffix, nu, D, testval=init_corr)
    cov = pm.Deterministic('cov' + suffix,
                           make_cov_matrix(sigma, C_triu, module=tt))

    mu = pm.MvNormal('mu' + suffix, 0, cov, shape=2, testval=init_mean)

    return pm.MvNormal.dist(mu, cov) if dist else pm.MvNormal(
        'mvn' + suffix, mu, cov, shape=data.shape)
Beispiel #4
0
    def __init__(self,
                 dimension,
                 mu_data,
                 tau_data,
                 prior="Gaussian",
                 parameters={
                     "location": None,
                     "scale": None,
                     "corr": False
                 },
                 hyper_alpha=None,
                 hyper_beta=None,
                 hyper_gamma=None,
                 hyper_delta=None,
                 transformation=None,
                 parametrization="non-central",
                 name='',
                 model=None):

        assert isinstance(dimension, int), "dimension must be integer!"
        assert dimension in [3, 5, 6], "Not a valid dimension!"

        D = dimension

        # 2) call super's init first, passing model and name
        # to it name will be prefix for all variables here if
        # no name specified for model there will be no prefix
        super().__init__(str(D) + "D", model)
        # now you are in the context of instance,
        # `modelcontext` will return self you can define
        # variables in several ways note, that all variables
        # will get model's name prefix

        #------------------- Data ------------------------------------------------------
        N = int(len(mu_data) / D)
        if N == 0:
            sys.exit(
                "Data has length zero!. You must provide at least one data point"
            )
        #-------------------------------------------------------------------------------

        #============= Transformations ====================================

        if transformation is "mas":
            Transformation = Iden

        elif transformation is "pc":
            if D is 3:
                Transformation = cartesianToSpherical
            elif D is 6:
                Transformation = phaseSpaceToAstrometry_and_RV
            elif D is 5:
                Transformation = phaseSpaceToAstrometry
                D = 6

        else:
            sys.exit("Transformation is not accepted")
        #==================================================================

        #================ Hyper-parameters =====================================
        if hyper_delta is None:
            shape = 1
        else:
            shape = len(hyper_delta)

        #--------- Location ----------------------------------
        if parameters["location"] is None:

            location = [
                pm.Normal("loc_{0}".format(i),
                          mu=hyper_alpha[i][0],
                          sigma=hyper_alpha[i][1],
                          shape=shape) for i in range(D)
            ]

            #--------- Join variables --------------
            mu = pm.math.stack(location, axis=1)

        else:
            mu = parameters["location"]
        #------------------------------------------------------

        #------------- Scale --------------------------
        if parameters["scale"] is None:
            scale = [
                pm.Gamma("scl_{0}".format(i),
                         alpha=2.0,
                         beta=2.0 / hyper_beta[i][0],
                         shape=shape) for i in range(D)
            ]

        else:
            scale = parameters["scale"]
        #--------------------------------------------------

        #----------------------- Correlation -----------------------------------------
        if parameters["corr"]:
            pm.LKJCorr('chol_corr', eta=hyper_gamma, n=D)
            C = tt.fill_diagonal(
                self.chol_corr[np.zeros((D, D), dtype=np.int64)], 1.)
            # print_ = tt.printing.Print('C')(C)
        else:
            C = np.eye(D)
        #-----------------------------------------------------------------------------

        #-------------------- Covariance -------------------------
        sigma_diag = pm.math.stack(scale, axis=1)
        cov = theano.shared(np.zeros((shape, D, D)))

        for i in range(shape):
            sigma = tt.nlinalg.diag(sigma_diag[i])
            covi = tt.nlinalg.matrix_dot(sigma, C, sigma)
            cov = tt.set_subtensor(cov[i], covi)
        #---------------------------------------------------------
        #========================================================================

        #===================== True values ============================================
        if prior is "Gaussian":
            pm.MvNormal("source", mu=mu, cov=cov[0], shape=(N, D))

        elif prior is "GMM":
            pm.Dirichlet("weights", a=hyper_delta, shape=shape)

            comps = [
                pm.MvNormal.dist(mu=mu[i], cov=cov[i]) for i in range(shape)
            ]

            pm.Mixture("source",
                       w=self.weights,
                       comp_dists=comps,
                       shape=(N, D))

        else:
            sys.exit("The specified prior is not supported")
        #=================================================================================

        #----------------------- Transformation---------------------------------------
        transformed = Transformation(self.source)
        #-----------------------------------------------------------------------------

        #------------ Flatten --------------------------------------------------------
        true = pm.math.flatten(transformed)
        #----------------------------------------------------------------------------

        #----------------------- Likelihood ----------------------------------------
        pm.MvNormal('obs', mu=true, tau=tau_data, observed=mu_data)
        #------------------------------------------------------------------------------
Beispiel #5
0
def build_mod_bpmf_model(train, alpha=2, dim=10, std=0.01):
    """Build the modified BPMF model using pymc3. The original model uses
    Wishart priors on the covariance matrices. Unfortunately, the Wishart
    distribution in pymc3 is currently not suitable for sampling. This
    version decomposes the covariance matrix into:

        diag(sigma) \dot corr_matrix \dot diag(std).

    We use uniform priors on the standard deviations (sigma) and LKJCorr
    priors on the correlation matrices (corr_matrix):

        sigma ~ Uniform
        corr_matrix ~ LKJCorr(n=1, p=dim)

    """
    n, m = train.shape
    beta_0 = 1  # scaling factor for lambdas; unclear on its use

    # Mean value imputation on training data.
    train = train.copy()
    nan_mask = np.isnan(train)
    train[nan_mask] = train[~nan_mask].mean()

    # We will use separate priors for sigma and correlation matrix.
    # In order to convert the upper triangular correlation values to a
    # complete correlation matrix, we need to construct an index matrix:
    n_elem = dim * (dim - 1) / 2
    tri_index = np.zeros([dim, dim], dtype=int)
    tri_index[np.triu_indices(dim, k=1)] = np.arange(n_elem)
    tri_index[np.triu_indices(dim, k=1)[::-1]] = np.arange(n_elem)

    logging.info('building the BPMF model')
    with pm.Model() as bpmf:
        # Specify user feature matrix
        sigma_u = pm.Uniform('sigma_u', shape=dim)
        corr_triangle_u = pm.LKJCorr('corr_u',
                                     n=1,
                                     p=dim,
                                     testval=np.random.randn(n_elem) * std)

        corr_matrix_u = corr_triangle_u[tri_index]
        corr_matrix_u = t.fill_diagonal(corr_matrix_u, 1)
        cov_matrix_u = t.diag(sigma_u).dot(corr_matrix_u.dot(t.diag(sigma_u)))
        lambda_u = t.nlinalg.matrix_inverse(cov_matrix_u)

        mu_u = pm.Normal('mu_u',
                         mu=0,
                         tau=beta_0 * t.diag(lambda_u),
                         shape=dim,
                         testval=np.random.randn(dim) * std)
        U = pm.MvNormal('U',
                        mu=mu_u,
                        tau=lambda_u,
                        shape=(n, dim),
                        testval=np.random.randn(n, dim) * std)

        # Specify item feature matrix
        sigma_v = pm.Uniform('sigma_v', shape=dim)
        corr_triangle_v = pm.LKJCorr('corr_v',
                                     n=1,
                                     p=dim,
                                     testval=np.random.randn(n_elem) * std)

        corr_matrix_v = corr_triangle_v[tri_index]
        corr_matrix_v = t.fill_diagonal(corr_matrix_v, 1)
        cov_matrix_v = t.diag(sigma_v).dot(corr_matrix_v.dot(t.diag(sigma_v)))
        lambda_v = t.nlinalg.matrix_inverse(cov_matrix_v)

        mu_v = pm.Normal('mu_v',
                         mu=0,
                         tau=beta_0 * t.diag(lambda_v),
                         shape=dim,
                         testval=np.random.randn(dim) * std)
        V = pm.MvNormal('V',
                        mu=mu_v,
                        tau=lambda_v,
                        shape=(m, dim),
                        testval=np.random.randn(m, dim) * std)

        # Specify rating likelihood function
        R = pm.Normal('R',
                      mu=t.dot(U, V.T),
                      tau=alpha * np.ones((n, m)),
                      observed=train)

    logging.info('done building the BPMF model')
    return bpmf
Beispiel #6
0
#%% PyMC3 model - directly on latent - v2
# In order to convert the upper triangular correlation values to a complete
# correlation matrix, we need to construct an index matrix:
n_elem = int(Nd * (Nd - 1) / 2)
tri_index = np.zeros([Nd, Nd], dtype=int)
tri_index[np.triu_indices(Nd, k=1)] = np.arange(n_elem)
tri_index[np.triu_indices(Nd, k=1)[::-1]] = np.arange(n_elem)

with pm.Model() as model:
    mu = pm.Normal('mu', mu=0, sd=1, shape=Nd)

    # We can specify separate priors for sigma and the correlation matrix:
    sd = pm.Uniform('sigma', shape=Nd)
    lam = pm.Deterministic('lambda', 1 / tt.sqr(sd))

    corr_triangle = pm.LKJCorr('r', eta=2, n=Nd)
    corr_matrix = corr_triangle[tri_index]
    corr_matrix = tt.fill_diagonal(corr_matrix, 1)

    cov = tt.diag(sd).dot(corr_matrix.dot(tt.diag(sd)))
    # chol = qr_chol(cov)

    lat_factor = pm.MvNormal('latent', mu=mu, cov=cov, observed=latent)

    trace_lat = pm.sample(2000, njobs=4)
#    start = pm.find_MAP()
#    trace_lat = pm.sample(2000, start=start, njobs=2)
#%% PyMC3 model - directly on latent - v3
with pm.Model() as model:
    mu = pm.Normal('mu', mu=0, sd=1, shape=Nd)
Beispiel #7
0
def gen_random_corr(num_models, eta=1):
    model = pm.Model()
    with model:
        packed = pm.LKJCorr('packed_L', n=num_models, eta=eta, transform=None)
        packed_array = packed.random(1)
    return unpack_upper_triangle(packed_array, num_models)
Beispiel #8
0
dataset = multivariate_normal(mu, cov_matrix, size=n_obs)

# In order to convert the upper triangular correlation values to a complete
# correlation matrix, we need to construct an index matrix:
n_elem = int(n_var * (n_var - 1) / 2)
tri_index = np.zeros([n_var, n_var], dtype=int)
tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)

with pm.Model() as model:

    mu = pm.Normal('mu', mu=0, sd=1, shape=n_var)

    # We can specify separate priors for sigma and the correlation matrix:
    sigma = pm.Uniform('sigma', shape=n_var)
    corr_triangle = pm.LKJCorr('corr', n=1, p=n_var)
    corr_matrix = corr_triangle[tri_index]
    corr_matrix = tt.fill_diagonal(corr_matrix, 1)

    cov_matrix = tt.diag(sigma).dot(corr_matrix.dot(tt.diag(sigma)))

    like = pm.MvNormal('likelihood', mu=mu, cov=cov_matrix, observed=dataset)


def run(n=1000):
    if n == "short":
        n = 50
    with model:
        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        trace = pm.sample(n, step=step, start=start)
def analyze(X,y_,initNum,stepNum=1000,outputFile=None,plot=False,v=0.5,multi=False,chains=4,C_sigma=2,name=None,indices=None,features=None):   
    #Preprocess data for Modeling
    #betaCovars=['{}_beta'.format(i) for i in X.columns]
    #gammaCovars=['{}_gamma'.format(i) for i in X.columns]
    #covarNames=gammaCovars+betaCovars+['alpha']
    #features=X.columns[1:] 
    #X=np.matrix(X)[:,1:]
    #print X
    m=X.shape[1]
    if indices==None:
        indices=range(m) # passing on the indices of the features we are analyzing. this is for the summary report 
    sha_m=shared(m)
    y_=np.array(y_)
    initNum=np.array(initNum)
    shA_X = shared(X)
    #print "data created"
    #Generate Model
    #graphMatrix=featureListToGraphMatrix(features)
    linear_model = pm.Model()
    #print m
    with linear_model:
        # Priors for unknown evoModel parameters
        #print y_.mean(),y_.std()
        obs=np.floor((y_*initNum))
        #defining mean
        alpha = pm.Normal("alpha",mu=sc.special.logit(y_.mean()),sd=sc.special.logit(y_.std()))
        #gamma=gp.GammaaPrior("gamma",v=v,graphMatrix=graphMatrix)
        gamma=pm.Bernoulli("gamma",p=v,shape=m)
        #defining the covaricnce matrix
        sha_C=shared(C_sigma)
        sigma =pm.Uniform('sigma',lower=0.2,upper=1.5)
        nu = pm.Uniform('nu', 0, 5)
        C_triu = pm.LKJCorr('C_triu', nu, 2) 
        C = pm.Deterministic('C', T.fill_diagonal(C_triu[np.zeros((m, m), dtype=np.int64)], 1.))
        #print type(gamma),type(sigma),type(X.shape[1])
        GammaPriorVariance=GammaCovariance(gamma, sigma,sha_m,sha_C)
        sigma_diag = pm.Deterministic('sigma_mat', T.nlinalg.diag(GammaPriorVariance))
        cov = pm.Deterministic('cov', T.nlinalg.matrix_dot(sigma_diag, C, sigma_diag))            
        #sigma = pm.InverseGamma("sigma",alpha=0.05,beta=0.05)#http://www.stat.columbia.edu/~gelman/research/published/taumain.pdf
        betas = pm.MvNormal("betas",0,
                                  cov=cov,shape=m)
        # Expected value of outcome
        mu = exp_it(alpha+np.array([betas[j]*gamma[j]*shA_X[:,j] for j in range(m)]).sum())   
        likelihood =pm.Binomial("likelihood", n=initNum, p=mu, observed=obs)

        stepC=Metropolis([betas,gamma,nu,C_triu,sigma,alpha],sigmaFactor=2,scaling=0.5)#
        #stepD=pm.BinaryGibbsMetropolis([gamma])#,scalingFunc=coolDownFunc
        #step=
        # MCMC
        tic=time.time()
        #trace = merge_traces([pm.sample(stepNum/5,tune=stepNum/25.,step=[stepC],progressbar=True,njobs=1,chain=i) for i in range(5)])
        trace = pm.sample(stepNum,tune=stepNum/5.,step=[stepC],progressbar=True,njobs=1)
        tac=time.time()
        print tac-tic
        summ=pm.stats.df_summary(trace)
    if not(features is None):
        #the point is to make the summary more readable by changing the index of the run to the index of 
        print (summ.index.values)
        variables=summ.index.values #taking the values of the variables
        for i in range(len(variables)):
            if "betas" in variables[i]: #if it's a beta coeff
                num=int(variables[i].split("__")[1]) #take the number
                variables[i]=features[indices[num]]#we are running on the indices. and then taking the relevent features from the list
        summ.index=variables#placing back the index
    #dfSummary=pm.stats.df_summary(trace)
    #Traceplot
    if plot:
        pm.traceplot(trace,varnames=['alpha','betas','sigma'],combined=True)
        plt.title(name)
        plt.show()
    else:
        pm.traceplot(trace,varnames=['alpha','betas','sigma'],combined=True)
        plt.title(name)
        plt.savefig('/sternadi/home/volume1/guyling/MCMC/dataSimulations/plots/{}.png'.format(name))
    #print summ
    return summ 
Beispiel #10
0
    
    print(repeat_shape)
    if broadcast_shape == (1,) and prefix_shape == ():
        if size is not None:
            samples = generator(size=size, *args, **kwargs)
        else:
            samples = generator(size=1, *args, **kwargs)
    else:
        if size is not None:
            samples = replicate_samples(generator,
                                        broadcast_shape,
                                        repeat_shape + prefix_shape,
                                        *args, **kwargs)
        else:
            samples = replicate_samples(generator,
                                        broadcast_shape,
                                        prefix_shape,
                                        *args, **kwargs)
    return reshape_sampled(samples, size, dist_shape)

#%%
import pymc3 as pm
from pymc3.distributions.distribution import draw_values
with pm.Model() as model:
    lkj=pm.LKJCorr('lkj', n=5, eta=1.)

n, eta = draw_values([lkj.distribution.n, lkj.distribution.eta], point=model.test_point)
testlkj=lkj.distribution
size=100
samples = generate_samples(testlkj._random, n, eta,
                           broadcast_shape=(size,))
Beispiel #11
0
    return tt.nlinalg.matrix_dot(sigma_matrix, corr_matrix, sigma_matrix)

# Define a varying slopes model incorporating a beta_urban term
with pm.Model() as model_vs:  
    # Set the prior for the overall intercept
    alpha = pm.Normal(name='alpha', mu=0.0, sd=10.0)
    # Set the prior for the overall intercept on urban, beta
    beta = pm.Normal(name='beta', mu=0.0, sd=10.0)
    
    # Citation: http://am207.info/wiki/corr.html for code controlling correlation structure
    # The parameter nu is the prior on correlation; 0 is uniform, infinity is no corelation
    nu = pm.Uniform('nu', 1.0, 5.0)
    # The number of dimensions here is 2: correlation structure is bewteen alpha and beta by district
    num_factors: int = 2
    # Sample the correlation coefficients using the LKJ distribution
    corr_coeffs = pm.LKJCorr('corr_coeffs', nu, num_factors)

    # Sample the variances of the single factors
    sigma_priors = tt.stack([pm.Lognormal('sigma_prior_alpha', mu=0.0, tau=1.0),
                             pm.Lognormal('sigma_prior_beta', mu=0.0, tau=1.0)])

    # Make the covariance matrix as a Theano tensor
    cov = pm.Deterministic('cov', pm_make_cov(sigma_priors, corr_coeffs, num_factors))
    # The multivariate Gaussian of (alpha, beta) by district
    theta_district = pm.MvNormal('theta_district', mu=[0.0, 0.0], cov=cov, shape=(num_districts, num_factors))   

    # The vector of standard deviations for each variable; size num_factors x num_factors
    # Citation: efficient generation of sigmas and rhos from cov
    # https://github.com/aloctavodia/Statistical-Rethinking-with-Python-and-PyMC3/blob/master/Chp_13.ipynb
    sigmas = pm.Deterministic('sigmas', tt.sqrt(tt.diag(cov)))
    # correlation matrix (num_factors x num_factors)