Esempio n. 1
0
 def make_model(cls):
     with pm.Model() as model:
         sd_mu = np.array([1, 2, 3, 4, 5])
         sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10.0, shape=5)
         chol_packed = pm.LKJCholeskyCov("chol_packed", eta=3, n=5, sd_dist=sd_dist)
         chol = pm.expand_packed_triangular(5, chol_packed, lower=True)
         cov = tt.dot(chol, chol.T)
         stds = tt.sqrt(tt.diag(cov))
         pm.Deterministic("log_stds", tt.log(stds))
         corr = cov / stds[None, :] / stds[:, None]
         corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2
         pm.Deterministic("corr_entries_unit", corr_entries_unit)
     return model
Esempio n. 2
0
    def test_sample_prior_and_posterior(self):
        def build_toy_dataset(N, K):
            pi = np.array([0.2, 0.5, 0.3])
            mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]]
            stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]]
            x = np.zeros((N, 3), dtype=np.float32)
            y = np.zeros((N, ), dtype=np.int)
            for n in range(N):
                k = np.argmax(np.random.multinomial(1, pi))
                x[n, :] = np.random.multivariate_normal(
                    mus[k], np.diag(stds[k]))
                y[n] = k
            return x, y

        N = 100  # number of data points
        K = 3  # number of mixture components
        D = 3  # dimensionality of the data

        X, y = build_toy_dataset(N, K)

        with pm.Model() as model:
            pi = pm.Dirichlet("pi", np.ones(K), shape=(K, ))

            comp_dist = []
            mu = []
            packed_chol = []
            chol = []
            for i in range(K):
                mu.append(pm.Normal("mu%i" % i, 0, 10, shape=D))
                packed_chol.append(
                    pm.LKJCholeskyCov("chol_cov_%i" % i,
                                      eta=2,
                                      n=D,
                                      sd_dist=pm.HalfNormal.dist(2.5)))
                chol.append(
                    pm.expand_packed_triangular(D, packed_chol[i], lower=True))
                comp_dist.append(
                    pm.MvNormal.dist(mu=mu[i], chol=chol[i], shape=D))

            pm.Mixture("x_obs", pi, comp_dist, observed=X)
        with model:
            trace = pm.sample(30, tune=10, chains=1)

        n_samples = 20
        with model:
            ppc = pm.sample_posterior_predictive(trace, n_samples)
            prior = pm.sample_prior_predictive(samples=n_samples)
        assert ppc["x_obs"].shape == (n_samples, ) + X.shape
        assert prior["x_obs"].shape == (n_samples, ) + X.shape
        assert prior["mu0"].shape == (n_samples, D)
        assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
Esempio n. 3
0
def Arodz(x0, x1):
    """Takes in two sample sets, one from each class, and
    returns the MAP estimates of the means and covariance
    """
    numberOfFeatures = len(x0[0])

    # instantiate an empty PyMC3 model
    basic_model = pm.Model()

    # fill the model with details:
    with basic_model:
        # parameters for priors for gaussian means
        mu_prior_cov = 100 * np.eye(numberOfFeatures)
        mu_prior_mu = np.zeros((numberOfFeatures, ))

        # Priors for gaussian means (Gaussian prior): mu1 ~ N(mu_prior_mu, mu_prior_cov), mu0 ~ N(mu_prior_mu, mu_prior_cov)
        mu1 = pm.MvNormal('estimated_mu1',
                          mu=mu_prior_mu,
                          cov=mu_prior_cov,
                          shape=numberOfFeatures)
        mu0 = pm.MvNormal('estimated_mu0',
                          mu=mu_prior_mu,
                          cov=mu_prior_cov,
                          shape=numberOfFeatures)

        # Prior for gaussian covariance matrix (LKJ prior):
        # see here for details: http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html
        # and here: http://docs.pymc.io/notebooks/LKJ.html
        sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=numberOfFeatures)
        chol_packed = pm.LKJCholeskyCov('chol_packed',
                                        n=numberOfFeatures,
                                        eta=2,
                                        sd_dist=sd_dist)
        chol = pm.expand_packed_triangular(numberOfFeatures, chol_packed)
        cov_mx = pm.Deterministic('estimated_cov', chol.dot(chol.T))

        # observations x1, x0 are supposed to be P(x|y=class1)=N(mu1,cov_both), P(x|y=class0)=N(mu0,cov_both)
        # here is where the Dataset (x1,x0) comes to influence the choice of paramters (mu1,mu0, cov_both)
        # this is done through the "observed = ..." argument; note that above we didn't have that
        x1_obs = pm.MvNormal('x1', mu=mu1, chol=chol, observed=x1)
        x0_obs = pm.MvNormal('x0', mu=mu0, chol=chol, observed=x0)
    # done with setting up the model

    # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation
    # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value"
    map_estimate1 = pm.find_MAP(model=basic_model)
    # print(map_estimate1)

    return map_estimate1['estimated_mu0'], map_estimate1[
        'estimated_mu1'], map_estimate1['estimated_cov']
def covariation(): 
    with pm.Model(coords=coords) as m_covariation: 

        # Inputs
        idx_ = pm.Data('idx_shared', idx_train, dims = ('idx', 't'))
        t_ = pm.Data('t_shared', t_train, dims = ('idx', 't'))


        # prior stddev in intercepts & slopes (variation across counties):
        sd_dist = pm.HalfNormal.dist(0.5) # distribution. 

        # get back standard deviations and rho:
        ## eta = 1: uniform (higher --> more weight on low cor.)
        ## n = 2: number of predictors
        chol, corr, stds = pm.LKJCholeskyCov(
            "chol", 
            n=2, 
            eta=2, 
            sd_dist=sd_dist, 
            compute_corr = True) 

        # priors for mean effects
        alpha = pm.Normal("alpha", mu = 1.5, sigma = 0.5)
        beta = pm.Normal("beta", mu = 0, sigma = 0.5)

        # population of varying effects
        alpha_beta = pm.MvNormal(
            "alpha_beta", 
            mu = tt.stack([alpha, beta]), 
            chol = chol, 
            dims=("idx", "param"))

        # expected value per participant at each time-step
        mu = alpha_beta[idx_, 0] + alpha_beta[idx_, 1] * t_

        # model error
        sigma = pm.HalfNormal("sigma", sigma = 0.5)

        # likelihood
        y_pred = pm.Normal(
            "y_pred", 
            mu = mu, 
            sigma = sigma, 
            observed = y_train, 
            dims = ('idx', 't'))

        # return the model
        return m_covariation
Esempio n. 5
0
def build_model(data, K):
    N = data.shape[0]
    d = data.shape[1]
    print('Building model with n=%d, d=%d, k=%d' % (N, d, K))
    with pm.Model() as gmm:
        #Prior over component weights
        if K > 1:
            p = pm.Dirichlet('p', a=np.array([1.] * K))

        #Prior over component means
        mus = [
            pm.MvNormal('mu_%d' % i,
                        mu=pm.floatX(np.zeros(d)),
                        tau=pm.floatX(0.1 * np.eye(d)),
                        shape=(d, ))
            #testval = pm.floatX(np.ones(d)))
            for i in range(K)
        ]
        #Cholesky decomposed LKJ prior over component covariance matrices
        packed_L = [
            pm.LKJCholeskyCov('packed_L_%d' % i,
                              n=d,
                              eta=2.,
                              sd_dist=pm.HalfCauchy.dist(1))
            #testval = pm.floatX(np.ones(int(d*(d-1)/2+d))))
            for i in range(K)
        ]
        #Unpack packed_L into full array
        L = [pm.expand_packed_triangular(d, packed_L[i]) for i in range(K)]
        #Convert L to sigma and tau for convenience
        sigma = [
            pm.Deterministic('sigma_%d' % i, L[i].dot(L[i].T))
            for i in range(K)
        ]
        tau = [
            pm.Deterministic('tau_%d' % i, matrix_inverse(sigma[i]))
            for i in range(K)
        ]

        #Specify the likelihood
        if K > 1:
            mvnl = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)]
            Y_obs = pm.Mixture('Y_obs', w=p, comp_dists=mvnl, observed=data)
        else:
            Y_obs = pm.MvNormal('Y_obs', mu=mus[0], chol=L[0], observed=data)

    return gmm
    def _multivariate_normal_dist(self, init_mu, suffix=""):
        if not isinstance(suffix, str):
            suffix = str(suffix)
        data_dim = len(init_mu)

        # prior of covariance
        sd_dist = pm.HalfCauchy.dist(beta=2.5)
        packed_chol = pm.LKJCholeskyCov('cov' + suffix,
                                        eta=2,
                                        n=data_dim,
                                        sd_dist=sd_dist)
        chol = pm.expand_packed_triangular(data_dim, packed_chol, lower=True)
        # prior of mean
        mu = pm.MvNormal('mu' + suffix,
                         mu=0,
                         cov=np.eye(data_dim),
                         shape=data_dim)
        return pm.MvNormal.dist(mu, chol=chol)
Esempio n. 7
0
    def test_mv_missing_data_model(self):
        data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1)

        model = pm.Model()
        with model:
            mu = pm.Normal("mu", 0, 1, size=2)
            sd_dist = pm.HalfNormal.dist(1.0)
            chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True)
            y = pm.MvNormal("y", mu=mu, chol=chol, observed=data)
            inference_data = pm.sample(100, chains=2, return_inferencedata=True)

        # make sure that data is really missing
        assert isinstance(y.owner.op, AdvancedIncSubtensor)

        test_dict = {
            "posterior": ["mu", "chol_cov"],
            "observed_data": ["y"],
            "log_likelihood": ["y"],
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
Esempio n. 8
0
    def test_mv_missing_data_model(self):
        data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1)

        model = pm.Model()
        with model:
            mu = pm.Normal("mu", 0, 1, shape=2)
            sd_dist = pm.HalfNormal.dist(1.0)
            chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True)
            pm.MvNormal("y", mu=mu, chol=chol, observed=data)
            trace = pm.sample(100, chains=2)

        # make sure that data is really missing
        (y_missing,) = model.missing_values
        assert y_missing.tag.test_value.shape == (4,)
        inference_data = from_pymc3(trace=trace, model=model)
        test_dict = {
            "posterior": ["mu", "chol_cov"],
            "observed_data": ["y"],
            "log_likelihood": ["y"],
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
Esempio n. 9
0
    def __init__(self, data, sigma, mu_prior=[0.0,1000.], sigma_prior=200.):

        self.fitted=False
        if np.any(sigma <=0.):
            raise ValueError("Uncertainties must be positive real numbers!")
        self.plot_trace_vars = ['mu', "chol_corr"]
        if data is None:
            raise ValueError("Either data must be given as input, or x and y")
        else:
            self.ndim = data.shape[1]
            self.npoints = data.shape[0]
            self.data = data
            if data.shape != sigma.shape:
                raise RuntimeError("data and sigma must have the same shape!")
            self.sigma = sigma

        self.model = pm.Model()
        with self.model:
            #we put weakly informative hyperpriors on the means and standard deviations of the multivariate normal distribution
            mu = pm.Normal("mu", mu=mu_prior[0], sigma=mu_prior[1], shape=self.ndim)
            sigma = pm.HalfCauchy.dist(sigma_prior)
            #and a hyperprior on the covariance matrix which weakly penalises strong correlations
            chol, corr, stds = pm.LKJCholeskyCov("chol", n=self.ndim, eta=2.0, sd_dist=sigma, compute_corr=True)
            #the hyperprior gives us the Cholesky Decomposition of the covariance matrix, so for completeness we can calculate that determinisitically
            cov = pm.Deterministic("cov", chol.dot(chol.T))

            #and now we can construct our multivariate normals to complete the prior
            prior = pm.MvNormal('vals', mu=mu, chol=chol, shape=(self.npoints,self.ndim)) #, observed=self.data)
            #print(prior)
            #help(prior)
            mu1s = prior[:,0]

            datavars = []
            datavars = pm.Normal("data", mu = prior, sigma = self.sigma, observed = self.data)
            #Finally, we need to define our data
            #for i in range(self.ndim):
            #    datavars.append(pm.Normal("data_"+str(i), mu=prior[:,i], sigma = self.sigma[:,i], observed=self.data[:,i]))

            print(datavars)
Esempio n. 10
0
def sample_LKJ_prior(nu=2, shape=2, n_samples=200000):
    """
    Sample LKJ prior

    Parameters
    ----------
    nu : float,
        LKJ prior \nu parameter

    shape : int
        dimensionality of the covariance matrix.

    mcmc_samples : int
        Number of samples drawn from the prior.

    Returns
    -------
    r: numpy-array, shape (n_sample, )
        MCMC samples.
    """

    with pm.Model() as model_correlation:
        # generate a sample of
        sd_dist = pm.Gamma.dist(alpha=2, beta=1, shape=2)
        chol_packed = pm.LKJCholeskyCov('chol_packed', n=shape, eta=nu, sd_dist=sd_dist)
        chol = pm.expand_packed_triangular(shape, chol_packed)
        vals = pm.MvNormal('true_quantities', mu=0.0, chol=chol, shape=(1, shape))

    with model_correlation:
        # Use elliptical slice sampling
        trace = pm.sample(n_samples, chains=2)

    r = []
    for chol_p in zip(trace['chol_packed'][:]):
        cov = make_cov_mtx_from_chol_vec(chol_p, ndim=shape)
        r += [cov[1, 0] / np.sqrt(cov[0, 0] * cov[1, 1])]

    return r
Esempio n. 11
0
def run_normal_mv_model(data, K=3, mus=None, mc_samples=10000, jobs=1):

    with pm.Model() as model:
        n_samples, n_feats = data.shape
        #print n_samples,n_feats
        packed_L = pm.LKJCholeskyCov('packed_L',
                                     n=n_feats,
                                     eta=2.,
                                     sd_dist=pm.HalfCauchy.dist(2.5))
        L = pm.expand_packed_triangular(n_feats, packed_L)
        sigma = pm.Deterministic('Sigma', L.dot(L.T))

        mus = 0. if mus is None else mus

        #mus = pm.Normal('mus', mu = [[10,10], [55,55], [105,105], [155,155], [205,205]], sd = 10, shape=(K,n_feats))
        mus = pm.Normal('mus',
                        mu=mus,
                        sd=10.,
                        shape=(K, n_feats),
                        testval=data.mean(axis=0))

        pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K)
        #TODO one pi per voxel
        category = pm.Categorical('category', p=pi, shape=n_samples)
        xs = pm.MvNormal('x', mu=mus[category], chol=L, observed=data)

    with model:
        step2 = pm.ElemwiseCategorical(vars=[category], values=range(K))
        trace = sample(mc_samples, step2, n_jobs=jobs)

    pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma'])
    plt.title('normal mv model')

    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])
    #if chains > 1:
    #   print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values()))
    return model, mod, trace
 def __init__(self, n_to_sample=2000, *args, **kwargs):
     super(MvStudentTBayesianSolver, self).__init__(*args, **kwargs)
     self.n_to_sample = n_to_sample
     self.model = pm.Model()
     self.shared_data = theano.shared(np.zeros((5, 5)) * 0.5, borrow=True)
     with self.model:
         sd_dist = pm.Gamma.dist(alpha=3.0, beta=1.0)
         #sd_dist = pm.HalfCauchy.dist(beta=2.5)
         packed_chol = pm.LKJCholeskyCov('chol_cov',
                                         eta=2,
                                         n=5,
                                         sd_dist=sd_dist)
         chol = pm.expand_packed_triangular(5, packed_chol, lower=True)
         cov = pm.Deterministic('cov', theano.dot(chol, chol.T))
         self.mu_dist = pm.MvNormal("mu",
                                    mu=np.zeros(5),
                                    chol=chol,
                                    shape=5)
         observed = pm.MvStudentT('obs',
                                  nu=3.5,
                                  mu=self.mu_dist,
                                  chol=chol,
                                  observed=self.shared_data)
         self.step = pm.Metropolis()
Esempio n. 13
0
        [0.0, -0.06, 1.0, -0.04],
        [0.15, 0.19, -0.04, 1.0],
    ]
)
cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds)))

dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs)

with pm.Model() as model:

    mu = pm.Normal("mu", mu=0, sigma=1, shape=n_var)

    # Note that we access the distribution for the standard
    # deviations, and do not create a new random variable.
    sd_dist = pm.HalfCauchy.dist(beta=2.5)
    packed_chol = pm.LKJCholeskyCov("chol_cov", n=n_var, eta=1, sd_dist=sd_dist)
    # compute the covariance matrix
    chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True)
    cov = tt.dot(chol, chol.T)

    # Extract the standard deviations etc
    sd = pm.Deterministic("sd", tt.sqrt(tt.diag(cov)))
    corr = tt.diag(sd ** -1).dot(cov.dot(tt.diag(sd ** -1)))
    r = pm.Deterministic("r", corr[np.triu_indices(n_var, k=1)])

    like = pm.MvNormal("likelihood", mu=mu, chol=chol, observed=dataset)


def run(n=1000):
    if n == "short":
        n = 50
Esempio n. 14
0
    # Priors for gaussian means (Gaussian prior): mu1 ~ N(mu_prior_mu, mu_prior_cov), mu0 ~ N(mu_prior_mu, mu_prior_cov)
    mu1 = pm.MvNormal('estimated_mu1',
                      mu=mu_prior_mu,
                      cov=mu_prior_cov,
                      shape=numberOfFeatures)
    mu0 = pm.MvNormal('estimated_mu0',
                      mu=mu_prior_mu,
                      cov=mu_prior_cov,
                      shape=numberOfFeatures)

    # Prior for gaussian covariance matrix (LKJ prior):
    # see here for details: http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html
    # and here: http://docs.pymc.io/notebooks/LKJ.html
    sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=numberOfFeatures)
    chol_packed = pm.LKJCholeskyCov('chol_packed',
                                    n=numberOfFeatures,
                                    eta=2,
                                    sd_dist=sd_dist)
    chol = pm.expand_packed_triangular(numberOfFeatures, chol_packed)
    cov_mx = pm.Deterministic('estimated_cov', chol.dot(chol.T))

    # observations x1, x0 are supposed to be P(x|y=class1)=N(mu1,cov_both), P(x|y=class0)=N(mu0,cov_both)
    # here is where the Dataset (x1,x0) comes to influence the choice of paramters (mu1,mu0, cov_both)
    # this is done through the "observed = ..." argument; note that above we didn't have that
    x1_obs = pm.MvNormal('x1', mu=mu1, chol=chol, observed=x1)
    x0_obs = pm.MvNormal('x0', mu=mu0, chol=chol, observed=x0)

# done with setting up the model

# now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation
# map_estimate1 is a dictionary: "parameter name" -> "it's estimated value"
map_estimate1 = pm.find_MAP(model=basic_model)
Esempio n. 15
0
    def build(self):
        with pm.Model() as env_model:

            # Generate region weights
            w_r = pm.MvNormal('w_r',
                              mu=self.prior.loc_w_r,
                              tau=self.prior.scale_w_r,
                              shape=self.n_regions)

            # Generate Product weights
            #packed_L_p = pm.LKJCholeskyCov('packed_L_p', n=self.n_products,
            #                               eta=2., sd_dist=pm.HalfCauchy.dist(2.5))

            #L_p = pm.expand_packed_triangular(self.n_products, packed_L_p)

            mu_p = pm.MvNormal("mu_p",
                               mu=self.prior.loc_w_p,
                               cov=np.eye(self.n_products),
                               shape=self.n_products)

            #w_p = pm.MvNormal('w_p', mu=mu_p, chol=L_p,
            #                  shape=self.n_products)

            w_p = pm.MvNormal('w_p',
                              mu=mu_p,
                              cov=self.prior.scale_w_p,
                              shape=self.n_products)

            # Generate previous sales weight
            loc_w_s = pm.HalfCauchy('loc_w_s', 1.0)
            scale_w_s = pm.HalfCauchy('scale_w_s', 2.5)

            w_s = pm.TruncatedNormal('w_s',
                                     mu=loc_w_s,
                                     sigma=scale_w_s,
                                     lower=0.0)

            # Generate temporal weights
            packed_L_t = pm.LKJCholeskyCov('packed_L_t',
                                           n=self.n_temporal_features,
                                           eta=2.,
                                           sd_dist=pm.HalfCauchy.dist(2.5))
            L_t = pm.expand_packed_triangular(self.n_temporal_features,
                                              packed_L_t)
            mu_t = pm.MvNormal("mu_t",
                               mu=self.prior.loc_w_t,
                               cov=self.prior.scale_w_t,
                               shape=self.n_temporal_features)

            w_t = pm.MvNormal('w_t',
                              mu=mu_t,
                              chol=L_t,
                              shape=self.n_temporal_features)

            lambda_c_t = pm.math.dot(self.X_temporal, w_t.T)

            bias_q_loc = pm.Normal('bias_q_loc', mu=0.0, sigma=1.0)
            bias_q_scale = pm.HalfCauchy('bias_q_scale', 5.0)

            bias_q = pm.Normal("bias_q", mu=bias_q_loc, sigma=bias_q_scale)

            if self.log_linear:
                lambda_q = pm.math.exp(bias_q + lambda_c_t[self.time_stamps] +
                                       pm.math.dot(self.X_region, w_r.T) +
                                       pm.math.dot(self.X_product, w_p.T) +
                                       w_s * self.X_lagged)
            else:
                lambda_q = bias_q + lambda_c_t[self.time_stamps] + pm.math.dot(
                    self.X_region, w_r.T) + pm.math.dot(
                        self.X_product, w_p.T) + w_s * self.X_lagged

            sigma_q_ij = pm.InverseGamma("sigma_q_ij",
                                         alpha=self.prior.loc_sigma_q_ij,
                                         beta=self.prior.scale_sigma_q_ij)
            q_ij = pm.TruncatedNormal('quantity_ij',
                                      mu=lambda_q,
                                      sigma=sigma_q_ij,
                                      lower=0.0,
                                      observed=self.y)

        return env_model
X = X.apply(standardize, axis=0)

# mask NA
X_masked = np.ma.masked_invalid(X)

# model
with pm.Model() as model:
    # priors
    intercept = pm.Normal('intercept', mu=0, sigma=100)
    beta = pm.Normal('beta', mu=0, sigma=100, shape=X_masked.shape[1])
    kappa = pm.HalfCauchy('kappa', beta=5)

    # impute missing X
    chol, stds, corr = pm.LKJCholeskyCov('chol',
                                         n=X_masked.shape[1],
                                         eta=2,
                                         sd_dist=pm.Exponential.dist(1),
                                         compute_corr=True)
    cov = pm.Deterministic('cov', chol.dot(chol.T))
    X_mu = pm.Normal('X_mu',
                     mu=0,
                     sigma=100,
                     shape=X_masked.shape[1],
                     testval=X_masked.mean(axis=0))
    X_modeled = pm.MvNormal('X', mu=X_mu, chol=chol, observed=X_masked)

    # observation
    mu_ = intercept + tt.dot(X_modeled, beta)

    # likelihood
    mu = pm.math.invlogit(mu_)
def MultiOutput_Bayesian_Calibration(n_y,DataComp,DataField,DataPred,output_folder):
    # This is data preprocessing part
    n = np.shape(DataField)[0] # number of measured data
    m = np.shape(DataComp)[0] # number of simulation data

    p = np.shape(DataField)[1] - n_y # number of input x
    q = np.shape(DataComp)[1] - p - n_y # number of calibration parameters t

    xc = DataComp[:,n_y:] # simulation input x + calibration parameters t
    xf = DataField[:,n_y:] # observed input

    yc = DataComp[:,:n_y] # simulation output
    yf = DataField[:,:n_y] # observed output

    x_pred = DataPred[:,n_y:] # design points for predictions
    y_true = DataPred[:,:n_y] # true measured value for design points for predictions
    n_pred = np.shape(x_pred)[0] # number of predictions
    N = n+m+n_pred

    # Put points xc, xf, and x_pred on [0,1] 
    for i in range(p):
        x_min = min(min(xc[:,i]),min(xf[:,i]))
        x_max = max(max(xc[:,i]),max(xf[:,i]))
        xc[:,i] = (xc[:,i]-x_min)/(x_max-x_min)
        xf[:,i] = (xf[:,i]-x_min)/(x_max-x_min)
        x_pred[:,i] = (x_pred[:,i]-x_min)/(x_max-x_min)

    # Put calibration parameters t on domain [0,1]
    for i in range(p,(p+q)):
        t_min = min(xc[:,i])
        t_max = max(xc[:,i])
        xc[:,i] = (xc[:,i]-t_min)/(t_max-t_min)

    # store mean and std of yc for future scale back use
    yc_mean = np.zeros(n_y)
    yc_sd = np.zeros(n_y)

    # standardization of output yf and yc
    for i in range(n_y):
        yc_mean[i] = np.mean(yc[:,i])
        yc_sd[i] = np.std(yc[:,i])
        yc[:,i] = (yc[:,i]-yc_mean[i])/yc_sd[i]
        yf[:,i] = (yf[:,i]-yc_mean[i])/yc_sd[i]

    # This is modeling part
    with pm.Model() as model:
        # Claim prior part
        eta1 = pm.HalfCauchy("eta1", beta=5) # for eta of gaussian process
        lengthscale = pm.Gamma("lengthscale", alpha=2, beta=1, shape=(p+q)) # for lengthscale of gaussian process
        tf = pm.Beta("tf", alpha=2, beta=2, shape=q) # for calibration parameters
        sigma1 = pm.HalfCauchy('sigma1', beta=5) # for noise
        y_pred = pm.Normal('y_pred', 0, 1.5, shape=(n_pred,n_y)) # for y prediction

        # Setup prior of right cholesky matrix
        sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=n_y)
        colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=n_y, eta=2,sd_dist=sd_dist)
        colchol = pm.expand_packed_triangular(n_y, colchol_packed)

        # Concate data into a big matrix[[xf tf], [xc tc], [x_pred tf]]
        xf1 = tt.concatenate([xf, tt.fill(tt.zeros([n,q]), tf)], axis = 1)
        x_pred1 = tt.concatenate([x_pred, tt.fill(tt.zeros([n_pred,q]), tf)], axis = 1)
        X = tt.concatenate([xf1, xc, x_pred1], axis = 0)
        # Concate data into a big matrix[[yf], [yc], [y_pred]]
        y = tt.concatenate([yf, yc, y_pred], axis = 0)

        # Covariance funciton of gaussian process
        cov_z = eta1**2 * pm.gp.cov.ExpQuad((p+q), ls=lengthscale)
        # Gaussian process with covariance funciton of cov_z
        gp = MultiMarginal(cov_func = cov_z)

        # Bayesian inference
        matrix_shape = [n+m+n_pred,n_y]
        outcome = gp.marginal_likelihood("outcome", X=X, y=y, colchol=colchol, noise=sigma1, matrix_shape=matrix_shape)
        trace = pm.sample(250,cores=1)

    # This part is for data collection and visualization
    pm.summary(trace).to_csv(output_folder + '/trace_summary.csv')
    print(pm.summary(trace))

    name_columns = []
    n_columns = n_pred
    for i in range(n_columns):
        for j in range(n_y):
            name_columns.append('y'+str(j+1)+'_pred'+str(i+1))
    y_prediction = pd.DataFrame(np.array(trace['y_pred']).reshape(500,n_pred*n_y),columns=name_columns)

    #Draw Picture of cvrmse_dist and calculate index
    for i in range(n_y):
        index = list(range(0+i,n_pred*n_y+i,n_y))
        y_prediction1 = pd.DataFrame(y_prediction.iloc[:,index])
        y_prediction1 = y_prediction1*yc_sd[i]+yc_mean[i] # Scale y_prediction back
        y_prediction1.to_csv(output_folder + '/y_pred'+str(i+1)+'.csv') # Store y_prediction

        # Calculate the distribution of cvrmse
        cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction1-y_true[:,i]),axis=1)/n_pred)/np.mean(y_true[:,i])
        # Calculate the index and store it into csv
        index_cal(y_prediction1,y_true[:,i]).to_csv(output_folder + '/index'+str(i+1)+'.csv')
        # Draw pictrue of cvrmse distribution of each y
        plt.subplot(n_y, 1, i+1)
        plt.hist(cvrmse)

    plt.savefig(output_folder + '/cvrmse_dist.pdf')
    plt.close()

    #Draw Picture of Prediction_Plot
    for i in range(n_y):
        index = list(range(0+i,n_pred*n_y+i,n_y))

        y_prediction_mean = np.array(pm.summary(trace)['mean'][index])*yc_sd[i]+yc_mean[i]
        y_prediction_975 = np.array(pm.summary(trace)['hpd_97.5'][index])*yc_sd[i]+yc_mean[i]
        y_prediction_025 = np.array(pm.summary(trace)['hpd_2.5'][index])*yc_sd[i]+yc_mean[i]

        plt.subplot(n_y, 1, i+1)
        # estimated probability
        plt.scatter(x=range(n_pred), y=y_prediction_mean)
        # error bars on the estimate
         

        plt.vlines(range(n_pred), ymin=y_prediction_025, ymax=y_prediction_975)
        # actual outcomes
        plt.scatter(x=range(n_pred),
                   y=y_true[:,i], marker='x')

        plt.xlabel('predictor')
        plt.ylabel('outcome')

        # This is just to print original cvrmse to test whether outcome good
        if i == 0:
            cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true[:,0]))/len(y_prediction_mean-y_true[:,0]))/np.mean(y_true[:,0])
            print(cvrmse)

    plt.savefig(output_folder + '/Prediction_Plot.pdf')
    plt.close()
Esempio n. 18
0
import numpy as np
import pymc3 as pm
from matplotlib import pyplot as plt

if __name__ == "__main__":

    with pm.Model():
        mu = np.zeros(3)
        true_cov = np.array([[1.0, 0.5, 0.1], [0.5, 2.0, 0.2], [0.1, 0.2,
                                                                1.0]])
        data = np.random.multivariate_normal(mu, true_cov, 100)
        print(data)

        sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=3)
        chol_packed = pm.LKJCholeskyCov("chol_packed",
                                        n=3,
                                        eta=2,
                                        sd_dist=sd_dist)
        chol = pm.expand_packed_triangular(3, chol_packed)
        vals = pm.MvNormal("vals", mu=mu, chol=chol, observed=data)

        trace = pm.sample()
        pm.traceplot(trace)
        plt.savefig("tmp.png")
Esempio n. 19
0

def print_rv(a, b):
    tt.printing.Print(a)(b)


with pm.Model() as model:
    # Generalization of beta prior
    p = pm.Dirichlet('p', a=np.ones(4), shape=4)

    # Generalization of Binomial prior
    cc = pm.Categorical('c', p=p, shape=2)

    # prior for covariance matrix
    packed_L1 = pm.LKJCholeskyCov('packed_L1',
                                  n=2,
                                  eta=2.,
                                  sd_dist=pm.HalfCauchy.dist(6))

    # Convert to diagonal matrix
    L1 = pm.expand_packed_triangular(2, packed_L1)

    # Calculate covariance
    cov1 = pm.Deterministic('cov1', L1.dot(L1.T))

    # print distribution outputs
    print_rv('p', p)
    print_rv('cc', cc)
    print_rv('packed_L1', packed_L1)
'''

Model 1 Output:
Esempio n. 20
0
        for i in range(3):
            betas = pm.Normal(name=f"betas_{i}", sd=2.5, shape=1, testval=0)

            pi = pm.math.sigmoid(pm.math.matrix_dot(X, betas))
            pm.Bernoulli(name=f"Y_{i}", p=pi, observed=Y[:, i])

        trace = pm.sample(12000, tune=2000)
        print(pm.summary(trace))

    # Bayesian multivariate LVM
    with pm.Model():

        B = pm.Normal(name=f"B", sd=2.5, shape=B.shape, testval=0)
        Mu = pm.math.matrix_dot(X, B)

        # Prior on the correlation matrix ----------------------------------------------
        f = pm.Lognormal.dist(sd=1)
        L = pm.LKJCholeskyCov(name="L", eta=1, n=3, sd_dist=f)
        ch = pm.expand_packed_triangular(3, L, lower=True)
        cov = pm.math.matrix_dot(ch, ch.T)
        sd = tt.sqrt(tt.diag(cov))
        Theta = pm.Deterministic("Theta", cov / sd[:, None] / sd[None, :])
        # ------------------------------------------------------------------------------

        Psi = pm.MvNormal(name="Psi", mu=Mu, cov=Theta, shape=Y.shape)
        Pi = pm.math.sigmoid(Psi)
        pm.Bernoulli(name="Y", p=Pi, observed=Y)

        trace = pm.sample(15000, tune=5000)
        print(pm.summary(trace, var_names=["B", "Theta"]))
Esempio n. 21
0
def bsem(
    items,
    factors,
    paths,
    beta=0,
    nu_sd=2.5,
    alpha_sd=2.5,
    d_beta=2.5,
    corr_items=False,
    corr_factors=False,
    g_eta=100,
    l_eta=1,
    beta_beta=1,
):
    r"""Constructs Bayesian SEM.

    Args:
        items (np.array): Array of item data.
        factors (np.array): Factor design.
        paths (np.array): Array of directed factor paths.
        beta (:obj:`float` or `'estimate'`, optional): Standard deviation of normal
            prior on cross loadings. If `'estimate'`,  beta is estimated from the data.
        nu_sd (:obj:`float`, optional): Standard deviation of normal prior on item
            intercepts.
        alpha_sd (:obj:`float`, optional): Standard deviation of normal prior on factor
            intercepts.
        d_beta (:obj:`float`, optional): Scale parameter of half-Cauchy prior on factor
            standard deviation.
        corr_factors (:obj:`bool`, optional): Allow correlated factors.
        corr_items (:obj:`bool`, optional): Allow correlated items.
        g_eta (:obj:`float`, optional): Shape parameter of LKJ prior on residual item
            correlation matrix.
        l_eta (:obj:`float`, optional): Shape parameter of LKJ prior on factor
            correlation matrix.
        beta_beta (:obj:`float`, optional): Beta parameter of beta prior on beta.

    Returns:

        None: Places model in context.

    """
    # get numbers of cases, items, and factors
    n, p = items.shape
    p_, m = factors.shape
    assert p == p_, "Mismatch between data and factor-loading matrices"
    assert paths.shape == (m, m), "Paths matrix has wrong shape"
    I = tt.eye(m, m)

    # place priors on item and factor intercepts
    nu = pm.Normal(name=r"$\nu$",
                   mu=0,
                   sd=nu_sd,
                   shape=p,
                   testval=items.mean(axis=0))
    alpha = pm.Normal(name=r"$\alpha$",
                      mu=0,
                      sd=alpha_sd,
                      shape=m,
                      testval=np.zeros(m))

    # place priors on unscaled factor loadings
    Phi = pm.Normal(name=r"$\Phi$",
                    mu=0,
                    sd=1,
                    shape=factors.shape,
                    testval=factors)

    # place priors on paths
    B = tt.zeros(paths.shape)
    npths = np.sum(paths, axis=None)
    print(npths)
    if npths > 0:
        b = pm.Normal(name=r"$b$",
                      mu=0,
                      sd=1,
                      shape=npths,
                      testval=np.ones(npths))
        # create the paths matrix
        k = 0
        for i in range(m):
            for j in range(m):
                if paths[i, j] == 1:
                    B = tt.set_subtensor(B[i, j], b[k])
                    k += 1
    Gamma = pm.Deterministic("$\Gamma$", B)

    # create masking matrix for factor loadings
    if isinstance(beta, str):
        assert beta == "estimate", f"Don't know what to do with '{beta}'"
        beta = pm.Beta(name=r"$\beta$", alpha=1, beta=beta_beta, testval=0.1)
    M = (1 - np.asarray(factors)) * beta + np.asarray(factors)

    # create scaled factor loadings
    Lambda = pm.Deterministic(r"$\Lambda$", Phi * M)

    # determine item means
    mu = nu + matrix_dot(Lambda, alpha)

    # place priors on item standard deviations
    D = pm.HalfCauchy(name=r"$D$",
                      beta=d_beta,
                      shape=p,
                      testval=items.std(axis=0))

    # place priors on item correlations
    f = pm.Lognormal.dist(sd=0.25)
    if not corr_items:
        Omega = np.eye(p)
    else:
        G = pm.LKJCholeskyCov(name=r"$G$", eta=g_eta, n=p, sd_dist=f)
        ch1 = pm.expand_packed_triangular(p, G, lower=True)
        K = tt.dot(ch1, ch1.T)
        sd1 = tt.sqrt(tt.diag(K))
        Omega = pm.Deterministic(r"$\Omega$", K / sd1[:, None] / sd1[None, :])

    # determine residual item variances and covariances
    Theta = pm.Deterministic(r"$\Theta$", D[None, :] * Omega * D[:, None])

    # place priors on factor correlations
    if not corr_factors:
        Psi = np.eye(m)
    else:
        L = pm.LKJCholeskyCov(name=r"$L$", eta=l_eta, n=m, sd_dist=f)
        ch = pm.expand_packed_triangular(m, L, lower=True)
        Gamma = tt.dot(ch, ch.T)
        sd = tt.sqrt(tt.diag(Gamma))
        Psi = pm.Deterministic(r"$\Psi$", Gamma / sd[:, None] / sd[None, :])

    # determine variances and covariances of items
    A = matrix_inverse(I - Gamma)
    C = matrix_inverse(I - Gamma.T)
    Sigma = matrix_dot(Lambda, A, Psi, C, Lambda.T) + Theta

    # place priors on observations
    pm.MvNormal(name="$Y$",
                mu=mu,
                cov=Sigma,
                observed=items,
                shape=items.shape)
_, ax = plt.subplots(1, 1, figsize=(5, 5))
textloc = [[0, 0.5], [0, 0.8], [0.5, 0.9]]
for eta, loc in zip([1, 2, 4], textloc):
    R = pm.LKJCorr.dist(n=2, eta=eta).random(size=10000)
    az.plot_kde(R)
    ax.text(loc[0], loc[1], "eta = %s" % (eta), horizontalalignment="center")

ax.set_ylim(0, 1.1)
ax.set_xlabel("correlation")
ax.set_ylabel("Density")

# %%
cafe_idx = d["cafe"].values
with pm.Model() as m_13_1:
    sd_dist = pm.HalfCauchy.dist(beta=2)
    packed_chol = pm.LKJCholeskyCov("chol_cov", eta=2, n=2, sd_dist=sd_dist)

    chol = pm.expand_packed_triangular(2, packed_chol, lower=True)
    cov = pm.math.dot(chol, chol.T)

    sigma_ab = pm.Deterministic("sigma_cafe", tt.sqrt(tt.diag(cov)))
    corr = tt.diag(sigma_ab**-1).dot(cov.dot(tt.diag(sigma_ab**-1)))
    r = pm.Deterministic("Rho", corr[np.triu_indices(2, k=1)])

    ab = pm.Normal("ab", mu=0, sd=10, shape=2)
    ab_cafe = pm.MvNormal("ab_cafe", mu=ab, chol=chol, shape=(N_cafes, 2))

    mu = ab_cafe[:,
                 0][cafe_idx] + ab_cafe[:, 1][cafe_idx] * d["afternoon"].values
    sd = pm.HalfCauchy("sigma", beta=2)
    wait = pm.Normal("wait", mu=mu, sd=sd, observed=d["wait"])
Esempio n. 23
0
        if k > 1:
            # Prior over component weights (only applicable with k>1)
            p = pm.Dirichlet('p', a=np.array([1.] * k), testval=np.ones(k) / k)

        # Prior over component means
        mus_p = [
            pm.MvNormal('mu_%d' % pid,
                        mu=pm.floatX(np.zeros(2)),
                        tau=pm.floatX(0.1 * np.eye(2)),
                        shape=(k, 2)) for pi, pid in enumerate(data.keys())
        ]

        # Cholesky decomposed LKJ prior over component covariance matrices
        packed_L = [[
            pm.LKJCholeskyCov('packed_L_%d_%d' % (pid, i),
                              n=2,
                              eta=2.,
                              sd_dist=pm.HalfCauchy.dist(.01))
            for i in range(k)
        ] for pi, pid in enumerate(data.keys())]

        # Unpack packed_L into full array
        L = [[
            pm.expand_packed_triangular(2, packed_L[pi][i]) for i in range(k)
        ] for pi, pid in enumerate(data.keys())]

        # Convert L to sigma for convenience
        sigma = [[
            pm.Deterministic('sigma_%d_%d' % (pid, i),
                             L[pi][i].dot(L[pi][i].T)) for i in range(k)
        ] for pi, pid in enumerate(data.keys())]
Esempio n. 24
0
                        'paziente8': [1, 0, 1, 1, 0, 0, 1, 1, 0, 1],
                        'paziente9': [1, 0, 1, 1, 0, 0, 1, 1, 0, 1],
                        'paziente10': [1, 0, 1, 1, 0, 0, 1, 1, 0, 1],
                        'paziente11': [0, 1, 1, 1, 0, 0, 1, 1, 1, 0],
                        'paziente12': [1, 0, 1, 1, 0, 0, 1, 1, 0, 1],
                        'paziente13': [0, 1, 1, 1, 0, 0, 1, 1, 1, 0]
                    })
#%%

model = pm.Model()

sigma0 = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])

SEED = 12345

with model:
    packed_L = pm.LKJCholeskyCov('Packed_L',
                                 n=len(dati),
                                 eta=2.,
                                 sd_dist=pm.HalfCauchy.dist(1))
    L = pm.expand_packed_triangular(len(dati), packed_L)
    #    sigma = pm.Wishart("Sigma", nu = 1, V = sigma0, shape = (3,3))
    sigma = pm.Deterministic('Sigma', L.dot(L.T))
    #    mu = pm.Normal('mu', 0., 10., shape=3, testval = dati.mean(axis=1))
    logits = pm.MvNormal("Logits",
                         mu=0.5 * np.ones(len(dati)),
                         cov=sigma,
                         shape=(1, len(dati)))
    p = pm.Deterministic('p', tt.exp(logits) / (1 + tt.exp(logits)))
    observed = pm.Binomial("Observed", p=p, n=1, observed=dati.values.T)
    trace = pm.sample(random_seed=SEED, cores=1)
## BET: FORMULATE THE MODEL
mvg_model = pm.Model()
with mvg_model:
    # bias parametrization
    x = pm.Uniform('x',lower=0. ,upper=55., shape = (N,1))
    b0 = pm.Normal('b0',mu=0., sd=20., shape=M)
    b1 = pm.Normal('b1',mu=1., sd=0.5, shape=M)
    b2 = pm.Normal('b2',mu=0., sd=1./55., shape=M)
    xxx =pm.math.concatenate([x for _ in range(M)],axis = 1) #that list comprehension is just a "repmat"
    mu = xxx*xxx*b2+xxx*b1 + b0

    # covariance parametrization
    #sd_dist = TruncatedJeff.dist(shape=M)
    sd_template = pm.Bound(Jeff,lower = 0.001, upper = 55.)
    sd_dist = sd_template.dist(shape=M,testval=1.)
    chol_packed = pm.LKJCholeskyCov('chol_packed', n=M, eta = 100000.,sd_dist=sd_dist)
    chol = pm.expand_packed_triangular(M,chol_packed)
    # data connection
    y = pm.MvNormal('y',mu = mu, chol= chol, shape = (N,M),observed = data)

## GIMEL: SAMPLE
with mvg_model:
    # step = pm.Slice()
    trace = pm.sample(nsamples_aug, njobs = njobs,tune=2000)

## DALET: SAVE
# with open('trace','wb') as f:
#     pickle.dump(trace,f)


## WAW: EXPORT IN A DIGESTABLE FORMAT
Esempio n. 26
0
def bayesian_linear_model(data,
                          N_steps=20000,
                          step="Metropolis",
                          burnin=None,
                          njobs=1,
                          progressbar=True,
                          chain_start=0,
                          output_format="rho",
                          sample_params={}):
    """Docstring for bayesian linear model.
    :data: The data used, expects a 2-d array with one dimension having 2 columns/rows
    :N_steps: The number of steps in each chain. If using NUTS sampling, this can be smaller.
    :step: The sampling method. Either "Metropolis" (faster sampling, but needs more steps) or NUTS (slower, but fewer steps)
    :burnin: number of steps to discard at the beginning of each chain. If None, half of N_steps is discarded.
    :njobs: The number of parallel jobs.
    :chain_start: The number assigned to the chain. Can be useful when aiming to combine different chains
    :progressbar: Should a progressbar for the sampling?
    :output_format: What should be returned from the sampling? If "rho" only an numpy array with the correlation
    values is returned. If "full" the whole multitrace is returned, which is useful for convergence analysis.
    :sample_params: Additional parameters for pymc3's sample function.
    :returns: Either a multitrace or a numpy array, depending on output_format
    """

    # test the data for the right format and transform it if necessary/possible
    if isinstance(data, list):
        try:
            data = np.vstack(data)
        except ValueError:
            print("Error: Data dimensions do not match!")
            return None

    if isinstance(data, np.ndarray):
        if len(data.shape) != 2:
            if len(data) == 2 and len(data[0]) != len(data[1]):
                print("Error: Data dimensions do not match!")
                return None
            else:
                print(
                    "Error: Data not a two-dimensional array, don't know what to do!"
                )
                return None
        else:
            if data.shape[1] != 2:
                if data.shape[0] != 2:
                    print(
                        "Error: No dimension with 2 variables present, don't know what to do!"
                    )
                else:
                    data = data.T

    # if no burnin is specified, use half of the step number
    if burnin is None:
        burnin = N_steps / 2

    sample_params.update({
        "draws": N_steps,
        "njobs": njobs,
        "tune": burnin,
        "progressbar": progressbar
    })

    # initialize model
    basic_model = pm.Model()

    with basic_model:

        # define model priors
        m_mu = pm.Normal('mu', mu=0., sd=10, shape=2)
        nu = pm.Uniform('nu', 0, 5)
        packed_L = pm.LKJCholeskyCov('packed_L',
                                     n=2,
                                     eta=nu,
                                     sd_dist=pm.HalfCauchy.dist(1.))
        chol = pm.expand_packed_triangular(2, packed_L)
        sigma = pm.Deterministic('sigma', _Cov2Cor(chol.dot(chol.T)))

        # model likelihood
        mult_n = pm.MvNormal('mult_n', mu=m_mu, chol=chol, observed=data)

        # which sampler to use
        if step == "Metropolis":
            step = pm.Metropolis()
        elif step == "NUTS":
            step = pm.NUTS()
        sample_params.update({"step": step})

        # MCMC sample
        trace = pm.sample(**sample_params)

    # Return the full pymc3 trace or only an array of the correlation?
    if output_format == "full":
        output = trace
    else:
        output = trace["sigma"][:, 0, 1]
    return output
Esempio n. 27
0
        #      xxx*xxx*b2
        mu = bn[0]
        for k in range(1, K):
            mu = mu + bn[k] * xxx**k

        # covariance parametrization
        sd_template = pm.Bound(
            Jeff,
            lower=0.01,
            upper=np.max(data.max(axis=0) - data.min(axis=0)))
        sd_dist = sd_template.dist(shape=M, testval=testval['S'])
        # sd_dist = pm.HalfCauchy.dist(1.,shape = M,testval = 1.,)
        # sd_dist = pm.Uniform.dist(lower = 0., upper = data.max(axis=0)-data.min(axis=0), shape = M)
        chol_packed = pm.LKJCholeskyCov('chol_packed',
                                        n=M,
                                        eta=1.,
                                        sd_dist=sd_dist,
                                        testval=testval['chol_packed'])
        chol = pm.expand_packed_triangular(M, chol_packed)
        # data connection
        y = pm.MvNormal('y', mu=mu, chol=chol, shape=(N, M), observed=data)
        # mvg_model.logp()

    with mvg_model:
        if conf.approx:
            approx = pm.fit(method=conf.approx)
            trace = approx.sample(conf.nsamples_per_chain)
            save_state(trace, tracedir, mvg_model, approx, sca)
            exit(0)

        step = pm.NUTS()
Esempio n. 28
0
# Mulit-Level Model
# A frequentist HLM indicated that condition does not have an effect, so we don't implement that as a level here.
# But honestly, because I don't know how.
with pm.Model(
        coords=coords
) as model:  # block, user, direction. Based on m14_3 Statistical Rethinking 2.
    user_idx = pm.Data("user_idx", user_indices, dims="obs_id")
    block_idx = pm.Data("block_idx", block_indices, dims="obs_id")
    direction_idx = pm.Data("direction_idx", direction_indices, dims="obs_id")

    # Fixed priors.
    g = pm.Normal("g", mu=0.0, sd=1.0, dims='Direction')
    sd_dist = pm.Exponential.dist(1.0)
    chol_user, _, _ = pm.LKJCholeskyCov("chol_user",
                                        n=n_directions,
                                        eta=4,
                                        sd_dist=sd_dist,
                                        compute_corr=True)
    chol_block, _, _ = pm.LKJCholeskyCov("chol_block",
                                         n=n_directions,
                                         eta=4,
                                         sd_dist=sd_dist,
                                         compute_corr=True)

    # Adaptive priors, non-centered.
    z_user = pm.Normal("z_user", 0.0, 1.0, dims=('Direction', 'User'))
    alpha = pm.Deterministic("alpha", pm.math.dot(chol_user, z_user))
    z_block = pm.Normal("z_block", 0.0, 1.0, dims=('Direction', 'Block'))
    beta = pm.Deterministic("beta", pm.math.dot(chol_block, z_block))

    theta = pm.Deterministic(
Esempio n. 29
0
    def create_model(self):
        """
        Creates and returns the PyMC3 model.

        Note: The size of the shared variables must match the size of the
        training data. Otherwise, setting the shared variables later will raise
        an error. See http://docs.pymc.io/advanced_theano.html

        The DensityDist class is used as the likelihood term. The second
        argument, logp_gmix(mus, pi, np.eye(D)), is a python function which
        recieves observations (denoted by 'value') and returns the tensor
        representation of the log-likelihood.

        Returns
        ----------
        the PyMC3 model
        """
        model_input = theano.shared(
            np.zeros([self.num_training_samples, self.num_pred]))

        # model_output = theano.shared(np.zeros(self.num_training_samples))

        # model_truncate = theano.shared(np.zeros(self.num_training_samples,
        #                                     dtype='int'))

        self.shared_vars = {
            'model_input': model_input
            # ,
            # 'model_output': model_output,
            # 'model_truncate': model_truncate
        }

        # Log likelihood of normal distribution
        # def logp_normal(mu, tau, value):
        #     # log probability of individual samples
        #     k = tau.shape[0]
        #
        #     def delta(mu):
        #         return value - mu
        #     # delta = lambda mu: value - mu
        #     return (-1 / 2.) * (k * T.log(2 * np.pi) + T.log(1./det(tau)) +
        #                          (delta(mu).dot(tau) * delta(
        # mu)).sum(axis=1))

        # Log likelihood of Gaussian mixture distribution
        # def logp_gmix(mus, pi, tau):
        #     def logp_(value):
        #         logps = [T.log(pi[i]) + logp_normal(mu, tau, value)
        #                  for i, mu in enumerate(mus)]
        #
        #         return T.sum(
        # logsumexp(T.stacklists(logps)[:, :self.num_training_samples],
        # axis=0))
        #
        #     return logp_

        def stick_breaking(v):
            portion_remaining = tt.concatenate([[1],
                                                tt.extra_ops.cumprod(1 -
                                                                     v)[:-1]])
            return v * portion_remaining

        model = pm.Model()

        with model:

            K = self.num_truncate
            D = self.num_pred

            alpha = pm.Gamma('alpha', 1.0, 1.0)
            v = pm.Beta('v', 1, alpha, shape=K)
            pi_ = stick_breaking(v)
            pi = pm.Deterministic('pi', pi_ / pi_.sum())

            means = tt.stack([
                pm.Uniform('cluster_center_{}'.format(k),
                           lower=0.,
                           upper=10.,
                           shape=D) for k in range(K)
            ])

            lower = tt.stack([
                pm.LKJCholeskyCov('cluster_variance_{}'.format(k),
                                  n=D,
                                  eta=2.,
                                  sd_dist=pm.HalfNormal.dist(sd=1.))
                for k in range(K)
            ])

            chol = tt.stack(
                [pm.expand_packed_triangular(D, lower[k]) for k in range(K)])

            component_dists = [
                pm.MvNormal('component_dist_%d' % k,
                            mu=means[k],
                            chol=chol[k],
                            shape=D) for k in range(K)
            ]

            # rand = [pm.MvNormal(
            # 'rand_{}'.format(k),
            # mu=means[k], chol=Chol[k], shape=D) for k in range(K)]
            rand = pm.Normal.dist(0, 1).random

            X = pm.DensityDist(
                'X',
                logp_gmix(
                    mus=component_dists,
                    pi=pi,
                    tau=np.eye(D),
                    num_training_samples=model_input.get_value().shape[0]),
                observed=model_input,
                random=rand)

        return model
Esempio n. 30
0
minibatch_size = 500
X_minibatch = pm.Minibatch(X, minibatch_size)

# set up model
with pm.Model() as model:
    pi = pm.Dirichlet('pi', np.ones(K))
    comp_dist = []
    mu = []
    packed_chol = []
    chol = []
    for i in range(K):
        temp_mean = np.random.randint(low=50, high=200, size=D)
        mu.append(pm.Normal('mu%i' % i, temp_mean, 20, shape=D))
        packed_chol.append(
            pm.LKJCholeskyCov('chol_cov_%i' % i,
                              eta=2,
                              n=D,
                              sd_dist=pm.HalfNormal.dist(10)))
        chol.append(pm.expand_packed_triangular(D, packed_chol[i], lower=True))
        comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i]))

    xobs = pm.Mixture('x_obs', pi, comp_dist, observed=X_shared)

print("making inference...")
# Inference
with model:
    advi_mf = pm.ADVI()
    advi_mf.fit(10000,
                more_replacements={X_shared: X_minibatch},
                obj_optimizer=pm.adagrad(learning_rate=1e-2))

fig = plt.figure()