def exc(self): """ :return: A named tuple of model characteristics """ # pylint: disable=E1136 # self.share(tensor=self.data.independent, repeat=True, repeats=self.parameters.P) independent = self.data.independent # self.share(tensor=self.data.dependent, repeat=False) dependent = self.data.dependent with pm.Model() as model: # Intercepts packed_l_c = pm.LKJCholeskyCov(name='packed_l_c', eta=5.0, n=self.parameters.P, sd_dist=pm.HalfStudentT.dist(nu=2.0, sigma=3.0)) l_c = pm.expand_packed_triangular(n=self.parameters.P, packed=packed_l_c) ec_: pm.model.FreeRV = pm.MvGaussianRandomWalk( 'intercept', shape=(self.elements.sections_, self.parameters.P), chol=l_c) ecr = ec_[self.elements.indices] # Gradients packed_l_m = pm.LKJCholeskyCov(name='packed_l_m', eta=5.0, n=self.parameters.P, sd_dist=pm.HalfStudentT.dist(nu=2.0, sigma=3.0)) l_m = pm.expand_packed_triangular(n=self.parameters.P, packed=packed_l_m) em_: pm.model.FreeRV = pm.MvGaussianRandomWalk( 'gradient', shape=(self.elements.sections_, self.parameters.P), chol=l_m) emr = em_[self.elements.indices] # Regression regression = pm.Deterministic('regression', ecr + emr * independent) # Hyper-parameters sigma: pm.model.TransformedRV = pm.Uniform(name='sigma', lower=0, upper=18.5, shape=(self.parameters.N, self.parameters.P)) # Hence, likelihood ... # noinspection PyTypeChecker likelihood = pm.Normal(name='y', mu=regression, sigma=sigma, observed=dependent) # Inference # Drawing posterior samples using NUTS sampling # Beware, if the number of cores isn't set the function will use min(machine cores, 4) trace = pm.sample(draws=500, cores=2, target_accept=0.9, tune=1000) # maximal = pm.find_MAP() maximal = None # The trace generated from Markov Chain Monte Carlo sampling arviztrace = az.from_pymc3(trace=trace) # noinspection PyUnresolvedReferences,PyProtectedMember return self.ModelFeatures._make([model, trace, maximal, arviztrace, likelihood])
def log_important_ratio(approx, nsample): logp_func = approx.model.logp # in ADVI there are only 1 group approximation approx_group = approx.groups[0] if approx.short_name == "mean_field": mu_q = approx_group.params[0].eval() std_q = rho2sd(approx_group.params[1]).eval() logq_func = st.norm(mu_q, std_q) elif approx.short_name == "full_rank": packed_chol_q = approx_group.params[0] mu_q = approx_group.params[1].eval() dim = mu_q.shape[0] chol_q = pm.expand_packed_triangular( dim, packed_chol_q, lower=True).eval() cov_q = np.dot(chol_q, chol_q.T) logq_func = st.multivariate_normal(mu_q, cov_q) dict_to_array = approx_group.bij.map p_theta_y = [] q_theta = [] samples = approx.sample_dict_fn(nsample) # type: dict points = ({name: records[i] for name, records in samples.items()} for i in range(nsample)) for point in points: p_theta_y.append(logp_func(point)) q_theta.append(np.sum(logq_func.logpdf(dict_to_array(point)))) p_theta_y = np.asarray(p_theta_y) q_theta = np.asarray(q_theta) return p_theta_y, q_theta, p_theta_y - q_theta
def _build_log_volatility_mean(self): self.corr_type = corr_type = self.params.pop('corr_type') k = self.n_algos if corr_type == 'diag': log_vlt_mu = pm.Normal('log_vlt_mu', mu=-6, sd=0.5, shape=k) elif corr_type == 'dense': vlt_mu_dist = pm.Lognormal.dist(mu=-2, sd=0.5, shape=k) chol_cov_packed = pm.LKJCholeskyCov('chol_cov_packed_mu', n=k, eta=2, sd_dist=vlt_mu_dist) chol_cov = pm.expand_packed_triangular(k, chol_cov_packed) / np.exp(4) cov = tt.dot(chol_cov, chol_cov.T) variance_mu = tt.diag(cov) corr = cov / tt.sqrt(variance_mu[:, None] * variance_mu[None, :]) pm.Deterministic('chol_cov_mu', chol_cov) pm.Deterministic('cov_mu', cov) pm.Deterministic('corr_mu', corr) # important, add new coordinate self.coords['algo_chol'] = pd.RangeIndex(k * (k + 1) // 2) self.coords['algo_'] = self.coords['algo'] self.dims['chol_cov_packed_mu'] = ('algo_chol', ) self.dims['cov_mu'] = ('algo', 'algo_') self.dims['corr_mu'] = ('algo', 'algo_') self.dims['chol_cov_mu'] = ('algo', 'algo_') log_vlt_mu = pm.Deterministic('log_vlt_mu', tt.log(variance_mu) / 2.) else: raise NotImplementedError self.dims['log_vlt_mu'] = ('algo', ) return log_vlt_mu
def fit(self, sample_size, traceplot_name=None, fast_sampling=False): ''' sample_size (int): The size of the sample traceplot_name (str): The name of the traceplot file fast_sampling (bool): whether or not variational approximation should be used. Note: to evaluate the kernel function, pymc3 only accept tensor type from theano. ''' self.model = pm.Model() # self.X_train = tt.constant(self.X_train) #need tensor type self.X_train = shared(self.X_train) with self.model: evaluated_kernels = [] packed_L = pm.LKJCholeskyCov('packed_L', n=3, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(3, packed_L) for center in self.centers.values: evaluated_kernels.append( pm.MvNormal.dist(mu=center, chol=L).logp(self.X_train)) beta = pm.Normal('beta', mu=0, sd=3, shape=self.number_of_centers) latentProcess = pm.Deterministic('mu', tt.dot(beta, evaluated_kernels)) error = pm.HalfCauchy('error', 12) y_ = pm.Normal("y", mu=latentProcess, sd=error, observed=np.log(self.y_train)) if fast_sampling: with self.model: inference = pm.ADVI() approx = pm.fit(n=sample_size, method=inference) #until converge self.trace = approx.sample(draws=sample_size) else: with self.model: start = pm.find_MAP() self.trace = pm.sample(sample_size, start=start) if traceplot_name: fig, axs = plt.subplots(3, 2) # 2 RVs pm.traceplot(self.trace, varnames=['packed_L', 'beta', 'error'], ax=axs) fig.savefig(traceplot_name) fig_path = os.path.join(os.getcwd(), traceplot_name) print(f'the traceplot has been saved to {fig_path}')
def build_model(data, K): n_ppt = len(data) print('Building model with n=%d,K=%d' % (n_ppt, K)) with pm.Model() as gmm: #Prior if K > 1: p = pm.Dirichlet('p', a=pm.floatX(np.array([1.] * K)), testval=pm.floatX(np.ones(K) / K)) mus_p = [ pm.MvNormal('mu_%s' % pid, mu=pm.floatX(np.zeros(2)), tau=pm.floatX(0.1 * np.eye(2)), shape=(K, 2)) for pi, pid in enumerate(data.keys()) ] packed_L = [[ pm.LKJCholeskyCov('packed_L_%s_%d' % (pid, i), n=2, eta=pm.floatX(2.), sd_dist=pm.HalfCauchy.dist(.01)) for i in range(K) ] for pi, pid in enumerate(data.keys())] L = [[ pm.expand_packed_triangular(2, packed_L[pi][i]) for i in range(K) ] for pi, pid in enumerate(data.keys())] sigma = [[ pm.Deterministic('sigma_%s_%d' % (pid, i), L[pi][i].dot(L[pi][i].T)) for i in range(K) ] for pi, pid in enumerate(data.keys())] if K > 1: mvnl = [[ pm.MvNormal.dist(mu=mus_p[pi][i], chol=L[pi][i]) for i in range(K) ] for pi in range(n_ppt)] Y_obs = [ pm.Mixture('Y_obs_%s' % pid, w=p, comp_dists=mvnl[pi], observed=data[pid]) for pi, pid in enumerate(data.keys()) ] else: Y_obs = [ pm.MvNormal('Y_obs_%s' % pid, mu=mus_p[pi][0], chol=L[pi][0], observed=data[pid]) for pi, pid in enumerate(data.keys()) ] return gmm
def get_model_GP2(t, K, nsamples, cov_fcn, eta): tau = pmc.Gamma('tau', 1.0, 1.0, testval=1.0) S = cov_fcn(t, t[:, None], tau) + tns.eye(nsamples)*1e-6 Lc = tns.slinalg.cholesky(S) Lr_ = pmc.LKJCholeskyCov('Lr_', eta=eta, n=K, sd_dist=pmc.Gamma.dist(1.0, 1.0, shape=K)) Lr = pmc.expand_packed_triangular(K, Lr_, lower=True) psi = pmc.Normal('psi', mu=0.0, sd=1.0, shape=(K, nsamples), testval=0.0) phi = pmc.Deterministic('phi', pmc.invlogit(Lr.dot(psi).dot(Lc.T))) pmc.Deterministic('h2', tns.diag(Lr.dot(Lr.T))) return phi
def fit_t(self, data, nu=5): with pm.Model() as model: packed_L = pm.LKJCholeskyCov('packed_L', n=data.shape[1], eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(data.shape[1], packed_L) cov = pm.Deterministic('cov', L.dot(L.T)) mean = pm.Normal('mean', mu=0, sigma=10, shape=data.shape[1]) obs = pm.MvStudentT('obs', nu=nu, mu=mean, chol=L, observed=data) params = pm.find_MAP(model=model, progressbar=False) return params['mean'], params['cov']
def make_model(cls): with pm.Model() as model: sd_mu = np.array([1, 2, 3, 4, 5]) sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10., shape=5) chol_packed = pm.LKJCholeskyCov('chol_packed', eta=3, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, chol_packed, lower=True) cov = tt.dot(chol, chol.T) stds = tt.sqrt(tt.diag(cov)) pm.Deterministic('log_stds', tt.log(stds)) corr = cov / stds[None, :] / stds[:, None] corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2 pm.Deterministic('corr_entries_unit', corr_entries_unit) return model
def make_model(cls): with pm.Model() as model: sd_mu = np.array([1, 2, 3, 4, 5]) sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10., shape=5) chol_packed = pm.LKJCholeskyCov('chol_packed', eta=3, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, chol_packed, lower=True) cov = tt.dot(chol, chol.T) stds = tt.sqrt(tt.diag(cov)) pm.Deterministic('log_stds', tt.log(stds)) corr = cov / stds[None, :] / stds[:, None] corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2 pm.Deterministic('corr_entries_unit', corr_entries_unit) return model
def test_sample_prior_and_posterior(self): def build_toy_dataset(N, K): pi = np.array([0.2, 0.5, 0.3]) mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]] stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]] x = np.zeros((N, 3), dtype=np.float32) y = np.zeros((N, ), dtype=np.int) for n in range(N): k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal( mus[k], np.diag(stds[k])) y[n] = k return x, y N = 100 # number of data points K = 3 # number of mixture components D = 3 # dimensionality of the data X, y = build_toy_dataset(N, K) with pm.Model() as model: pi = pm.Dirichlet("pi", np.ones(K), shape=(K, )) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): mu.append(pm.Normal("mu%i" % i, 0, 10, shape=D)) packed_chol.append( pm.LKJCholeskyCov("chol_cov_%i" % i, eta=2, n=D, sd_dist=pm.HalfNormal.dist(2.5))) chol.append( pm.expand_packed_triangular(D, packed_chol[i], lower=True)) comp_dist.append( pm.MvNormal.dist(mu=mu[i], chol=chol[i], shape=D)) pm.Mixture("x_obs", pi, comp_dist, observed=X) with model: trace = pm.sample(30, tune=10, chains=1) n_samples = 20 with model: ppc = pm.sample_posterior_predictive(trace, n_samples) prior = pm.sample_prior_predictive(samples=n_samples) assert ppc["x_obs"].shape == (n_samples, ) + X.shape assert prior["x_obs"].shape == (n_samples, ) + X.shape assert prior["mu0"].shape == (n_samples, D) assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
def test_sample_prior_and_posterior(self): def build_toy_dataset(N, K): pi = np.array([0.2, 0.5, 0.3]) mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]] stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]] x = np.zeros((N, 3), dtype=np.float32) y = np.zeros((N,), dtype=np.int) for n in range(N): k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal(mus[k], np.diag(stds[k])) y[n] = k return x, y N = 100 # number of data points K = 3 # number of mixture components D = 3 # dimensionality of the data X, y = build_toy_dataset(N, K) with pm.Model() as model: pi = pm.Dirichlet('pi', np.ones(K)) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): mu.append(pm.Normal('mu%i' % i, 0, 10, shape=D)) packed_chol.append( pm.LKJCholeskyCov('chol_cov_%i' % i, eta=2, n=D, sd_dist=pm.HalfNormal.dist(2.5)) ) chol.append(pm.expand_packed_triangular(D, packed_chol[i], lower=True)) comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i])) pm.Mixture('x_obs', pi, comp_dist, observed=X) with model: trace = pm.sample(30, tune=10, chains=1) n_samples = 20 with model: ppc = pm.sample_posterior_predictive(trace, n_samples) prior = pm.sample_prior_predictive(samples=n_samples) assert ppc['x_obs'].shape == (n_samples,) + X.shape assert prior['x_obs'].shape == (n_samples,) + X.shape assert prior['mu0'].shape == (n_samples, D) assert prior['chol_cov_0'].shape == (n_samples, D * (D + 1) // 2)
def Arodz(x0, x1): """Takes in two sample sets, one from each class, and returns the MAP estimates of the means and covariance """ numberOfFeatures = len(x0[0]) # instantiate an empty PyMC3 model basic_model = pm.Model() # fill the model with details: with basic_model: # parameters for priors for gaussian means mu_prior_cov = 100 * np.eye(numberOfFeatures) mu_prior_mu = np.zeros((numberOfFeatures, )) # Priors for gaussian means (Gaussian prior): mu1 ~ N(mu_prior_mu, mu_prior_cov), mu0 ~ N(mu_prior_mu, mu_prior_cov) mu1 = pm.MvNormal('estimated_mu1', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures) mu0 = pm.MvNormal('estimated_mu0', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures) # Prior for gaussian covariance matrix (LKJ prior): # see here for details: http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html # and here: http://docs.pymc.io/notebooks/LKJ.html sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=numberOfFeatures) chol_packed = pm.LKJCholeskyCov('chol_packed', n=numberOfFeatures, eta=2, sd_dist=sd_dist) chol = pm.expand_packed_triangular(numberOfFeatures, chol_packed) cov_mx = pm.Deterministic('estimated_cov', chol.dot(chol.T)) # observations x1, x0 are supposed to be P(x|y=class1)=N(mu1,cov_both), P(x|y=class0)=N(mu0,cov_both) # here is where the Dataset (x1,x0) comes to influence the choice of paramters (mu1,mu0, cov_both) # this is done through the "observed = ..." argument; note that above we didn't have that x1_obs = pm.MvNormal('x1', mu=mu1, chol=chol, observed=x1) x0_obs = pm.MvNormal('x0', mu=mu0, chol=chol, observed=x0) # done with setting up the model # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value" map_estimate1 = pm.find_MAP(model=basic_model) # print(map_estimate1) return map_estimate1['estimated_mu0'], map_estimate1[ 'estimated_mu1'], map_estimate1['estimated_cov']
def construct_likelihood(self): lower_idxs = np.tril_indices(self.data.shape[-1], k=-1) L = pm.expand_packed_triangular(self.ndim, self.model['packed_L']) Sigma = pm.Deterministic('Sigma', L.dot(L.T)) std = tt.sqrt(tt.diag(Sigma)) corr = Sigma / tt.outer(std, std) pm.Deterministic('corr_coeffs', corr[lower_idxs]) if self.data_covariances is None: pm.MvNormal('like', mu=self.model['mu'], chol=L, observed=self.residual_data) else: like = _multivariate_normal_convolution_likelihood(Sigma, self.model['mu'], self.residual_data, self.data_covariances) pm.Potential('like', like)
def build_model(data, K): N = data.shape[0] d = data.shape[1] print('Building model with n=%d, d=%d, k=%d' % (N, d, K)) with pm.Model() as gmm: #Prior over component weights if K > 1: p = pm.Dirichlet('p', a=np.array([1.] * K)) #Prior over component means mus = [ pm.MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(d)), tau=pm.floatX(0.1 * np.eye(d)), shape=(d, )) #testval = pm.floatX(np.ones(d))) for i in range(K) ] #Cholesky decomposed LKJ prior over component covariance matrices packed_L = [ pm.LKJCholeskyCov('packed_L_%d' % i, n=d, eta=2., sd_dist=pm.HalfCauchy.dist(1)) #testval = pm.floatX(np.ones(int(d*(d-1)/2+d)))) for i in range(K) ] #Unpack packed_L into full array L = [pm.expand_packed_triangular(d, packed_L[i]) for i in range(K)] #Convert L to sigma and tau for convenience sigma = [ pm.Deterministic('sigma_%d' % i, L[i].dot(L[i].T)) for i in range(K) ] tau = [ pm.Deterministic('tau_%d' % i, matrix_inverse(sigma[i])) for i in range(K) ] #Specify the likelihood if K > 1: mvnl = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)] Y_obs = pm.Mixture('Y_obs', w=p, comp_dists=mvnl, observed=data) else: Y_obs = pm.MvNormal('Y_obs', mu=mus[0], chol=L[0], observed=data) return gmm
def _multivariate_normal_dist(self, init_mu, suffix=""): if not isinstance(suffix, str): suffix = str(suffix) data_dim = len(init_mu) # prior of covariance sd_dist = pm.HalfCauchy.dist(beta=2.5) packed_chol = pm.LKJCholeskyCov('cov' + suffix, eta=2, n=data_dim, sd_dist=sd_dist) chol = pm.expand_packed_triangular(data_dim, packed_chol, lower=True) # prior of mean mu = pm.MvNormal('mu' + suffix, mu=0, cov=np.eye(data_dim), shape=data_dim) return pm.MvNormal.dist(mu, chol=chol)
def kernel_stats(inFile, log_scale=True): par = get_params(inFile) n_kernel = 0 for var in sorted(par["means"]): n_kernel += "mus_f" in var tf = pm.distributions.transforms.StickBreaking() dfs = list() for tissue_type in ["t", "f"]: weights = tf.backward( par["means"][f"w_{tissue_type}_stickbreaking__"]).eval() n_dim = par["means"][f"x_{tissue_type}"].shape[1] volumes = list() for kernel in range(n_kernel): # get covariance elipse parameters packed_cov = par["means"][ f"packed_L_{tissue_type}_{kernel}_cholesky-cov-packed__"] lower = pm.expand_packed_triangular(n_dim, packed_cov, lower=True).eval() cov = np.dot(lower, lower.T) volume = np.linalg.det(cov) volumes.append(volume) type_df = pd.DataFrame( { "tissue": "tumor" if tissue_type == "t" else "non-tumor", "weight": weights, "volume": volumes, }, index=[f"kernel {i}" for i in range(n_kernel)], ) dfs.append(type_df) df = pd.concat(dfs) pl = (pn.ggplot(pn.aes("volume", "weight", color="tissue"), df) + pn.geom_point()) if log_scale: pl += pn.scale_y_log10() pl += pn.scale_x_log10() pl += pn.theme_minimal() return pl, df
def __init__( self, n_endog, n_lag_endog, t_const, t_init, t_ar, packed_chol, dist_init=pm.Flat.dist(), *args, **kwargs ): # Calculate shape from n_endog # TODO: Rename params?... n_endog = int(n_endog) n_lag_endog = int(n_lag_endog) shape = (n_endog,) super().__init__(shape=shape, *args, **kwargs) self.n_endog = n_endog self.n_lag_endog = n_lag_endog self.t_const = tt.as_tensor_variable(t_const) self.t_init = tt.as_tensor_variable(t_init) self.t_ar = tt.as_tensor_variable(t_ar) # Covariance distribution args for innovation process # TODO: Maybe allow non-MvNormal innovation dist? self.packed_chol = packed_chol self.chol = chol = pm.expand_packed_triangular( n_endog, packed_chol, lower=True ) self.cov = tt.dot(chol, chol.T) # Distribution for initial values - default is Flat a.k.a. "no idea" self.dist_init = dist_init # Test value self.mean = tt.as_tensor_variable(np.zeros(shape=(n_endog,)))
def sample_LKJ_prior(nu=2, shape=2, n_samples=200000): """ Sample LKJ prior Parameters ---------- nu : float, LKJ prior \nu parameter shape : int dimensionality of the covariance matrix. mcmc_samples : int Number of samples drawn from the prior. Returns ------- r: numpy-array, shape (n_sample, ) MCMC samples. """ with pm.Model() as model_correlation: # generate a sample of sd_dist = pm.Gamma.dist(alpha=2, beta=1, shape=2) chol_packed = pm.LKJCholeskyCov('chol_packed', n=shape, eta=nu, sd_dist=sd_dist) chol = pm.expand_packed_triangular(shape, chol_packed) vals = pm.MvNormal('true_quantities', mu=0.0, chol=chol, shape=(1, shape)) with model_correlation: # Use elliptical slice sampling trace = pm.sample(n_samples, chains=2) r = [] for chol_p in zip(trace['chol_packed'][:]): cov = make_cov_mtx_from_chol_vec(chol_p, ndim=shape) r += [cov[1, 0] / np.sqrt(cov[0, 0] * cov[1, 1])] return r
def run_normal_mv_model(data, K=3, mus=None, mc_samples=10000, jobs=1): with pm.Model() as model: n_samples, n_feats = data.shape #print n_samples,n_feats packed_L = pm.LKJCholeskyCov('packed_L', n=n_feats, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L = pm.expand_packed_triangular(n_feats, packed_L) sigma = pm.Deterministic('Sigma', L.dot(L.T)) mus = 0. if mus is None else mus #mus = pm.Normal('mus', mu = [[10,10], [55,55], [105,105], [155,155], [205,205]], sd = 10, shape=(K,n_feats)) mus = pm.Normal('mus', mu=mus, sd=10., shape=(K, n_feats), testval=data.mean(axis=0)) pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K) #TODO one pi per voxel category = pm.Categorical('category', p=pi, shape=n_samples) xs = pm.MvNormal('x', mu=mus[category], chol=L, observed=data) with model: step2 = pm.ElemwiseCategorical(vars=[category], values=range(K)) trace = sample(mc_samples, step2, n_jobs=jobs) pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma']) plt.title('normal mv model') mod = stats.mode(trace['category'][int(mc_samples * 0.75):]) #if chains > 1: # print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values())) return model, mod, trace
def __init__(self, n_to_sample=2000, *args, **kwargs): super(MvStudentTBayesianSolver, self).__init__(*args, **kwargs) self.n_to_sample = n_to_sample self.model = pm.Model() self.shared_data = theano.shared(np.zeros((5, 5)) * 0.5, borrow=True) with self.model: sd_dist = pm.Gamma.dist(alpha=3.0, beta=1.0) #sd_dist = pm.HalfCauchy.dist(beta=2.5) packed_chol = pm.LKJCholeskyCov('chol_cov', eta=2, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, packed_chol, lower=True) cov = pm.Deterministic('cov', theano.dot(chol, chol.T)) self.mu_dist = pm.MvNormal("mu", mu=np.zeros(5), chol=chol, shape=5) observed = pm.MvStudentT('obs', nu=3.5, mu=self.mu_dist, chol=chol, observed=self.shared_data) self.step = pm.Metropolis()
mu = mu + bn[k] * xxx**k # covariance parametrization sd_template = pm.Bound( Jeff, lower=0.01, upper=np.max(data.max(axis=0) - data.min(axis=0))) sd_dist = sd_template.dist(shape=M, testval=testval['S']) # sd_dist = pm.HalfCauchy.dist(1.,shape = M,testval = 1.,) # sd_dist = pm.Uniform.dist(lower = 0., upper = data.max(axis=0)-data.min(axis=0), shape = M) chol_packed = pm.LKJCholeskyCov('chol_packed', n=M, eta=1., sd_dist=sd_dist, testval=testval['chol_packed']) chol = pm.expand_packed_triangular(M, chol_packed) # data connection y = pm.MvNormal('y', mu=mu, chol=chol, shape=(N, M), observed=data) # mvg_model.logp() with mvg_model: if conf.approx: approx = pm.fit(method=conf.approx) trace = approx.sample(conf.nsamples_per_chain) save_state(trace, tracedir, mvg_model, approx, sca) exit(0) step = pm.NUTS() if util.conf_set(conf, 'from_truth'): start_ = None else:
for eta, loc in zip([1, 2, 4], textloc): R = pm.LKJCorr.dist(n=2, eta=eta).random(size=10000) az.plot_kde(R) ax.text(loc[0], loc[1], "eta = %s" % (eta), horizontalalignment="center") ax.set_ylim(0, 1.1) ax.set_xlabel("correlation") ax.set_ylabel("Density") # %% cafe_idx = d["cafe"].values with pm.Model() as m_13_1: sd_dist = pm.HalfCauchy.dist(beta=2) packed_chol = pm.LKJCholeskyCov("chol_cov", eta=2, n=2, sd_dist=sd_dist) chol = pm.expand_packed_triangular(2, packed_chol, lower=True) cov = pm.math.dot(chol, chol.T) sigma_ab = pm.Deterministic("sigma_cafe", tt.sqrt(tt.diag(cov))) corr = tt.diag(sigma_ab**-1).dot(cov.dot(tt.diag(sigma_ab**-1))) r = pm.Deterministic("Rho", corr[np.triu_indices(2, k=1)]) ab = pm.Normal("ab", mu=0, sd=10, shape=2) ab_cafe = pm.MvNormal("ab_cafe", mu=ab, chol=chol, shape=(N_cafes, 2)) mu = ab_cafe[:, 0][cafe_idx] + ab_cafe[:, 1][cafe_idx] * d["afternoon"].values sd = pm.HalfCauchy("sigma", beta=2) wait = pm.Normal("wait", mu=mu, sd=sd, observed=d["wait"]) trace_13_1 = pm.sample(5000, tune=2000)
[0., -0.06, 1., -0.04], [0.15, 0.19, -0.04, 1.]]) cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds))) dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs) with pm.Model() as model: mu = pm.Normal('mu', mu=0, sd=1, shape=n_var) # Note that we access the distribution for the standard # deviations, and do not create a new random variable. sd_dist = pm.HalfCauchy.dist(beta=2.5) packed_chol = pm.LKJCholeskyCov('chol_cov', n=n_var, eta=1, sd_dist=sd_dist) # compute the covariance matrix chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True) cov = tt.dot(chol, chol.T) # Extract the standard deviations etc sd = pm.Deterministic('sd', tt.sqrt(tt.diag(cov))) corr = tt.diag(sd**-1).dot(cov.dot(tt.diag(sd**-1))) r = pm.Deterministic('r', corr[np.triu_indices(n_var, k=1)]) like = pm.MvNormal('likelihood', mu=mu, chol=chol, observed=dataset) def run(n=1000): if n == "short": n = 50 with model: trace = pm.sample(n)
tau=pm.floatX(0.1 * np.eye(2)), shape=(k, 2)) for pi, pid in enumerate(data.keys()) ] # Cholesky decomposed LKJ prior over component covariance matrices packed_L = [[ pm.LKJCholeskyCov('packed_L_%d_%d' % (pid, i), n=2, eta=2., sd_dist=pm.HalfCauchy.dist(.01)) for i in range(k) ] for pi, pid in enumerate(data.keys())] # Unpack packed_L into full array L = [[ pm.expand_packed_triangular(2, packed_L[pi][i]) for i in range(k) ] for pi, pid in enumerate(data.keys())] # Convert L to sigma for convenience sigma = [[ pm.Deterministic('sigma_%d_%d' % (pid, i), L[pi][i].dot(L[pi][i].T)) for i in range(k) ] for pi, pid in enumerate(data.keys())] # Specify the likelihood if k > 1: mvnl = [[ pm.MvNormal.dist(mu=mus_p[pi][i], chol=L[pi][i]) for i in range(k) ] for pi in range(n_ppt)] Y_obs = [
def MultiOutput_Bayesian_Calibration(n_y,DataComp,DataField,DataPred,output_folder): # This is data preprocessing part n = np.shape(DataField)[0] # number of measured data m = np.shape(DataComp)[0] # number of simulation data p = np.shape(DataField)[1] - n_y # number of input x q = np.shape(DataComp)[1] - p - n_y # number of calibration parameters t xc = DataComp[:,n_y:] # simulation input x + calibration parameters t xf = DataField[:,n_y:] # observed input yc = DataComp[:,:n_y] # simulation output yf = DataField[:,:n_y] # observed output x_pred = DataPred[:,n_y:] # design points for predictions y_true = DataPred[:,:n_y] # true measured value for design points for predictions n_pred = np.shape(x_pred)[0] # number of predictions N = n+m+n_pred # Put points xc, xf, and x_pred on [0,1] for i in range(p): x_min = min(min(xc[:,i]),min(xf[:,i])) x_max = max(max(xc[:,i]),max(xf[:,i])) xc[:,i] = (xc[:,i]-x_min)/(x_max-x_min) xf[:,i] = (xf[:,i]-x_min)/(x_max-x_min) x_pred[:,i] = (x_pred[:,i]-x_min)/(x_max-x_min) # Put calibration parameters t on domain [0,1] for i in range(p,(p+q)): t_min = min(xc[:,i]) t_max = max(xc[:,i]) xc[:,i] = (xc[:,i]-t_min)/(t_max-t_min) # store mean and std of yc for future scale back use yc_mean = np.zeros(n_y) yc_sd = np.zeros(n_y) # standardization of output yf and yc for i in range(n_y): yc_mean[i] = np.mean(yc[:,i]) yc_sd[i] = np.std(yc[:,i]) yc[:,i] = (yc[:,i]-yc_mean[i])/yc_sd[i] yf[:,i] = (yf[:,i]-yc_mean[i])/yc_sd[i] # This is modeling part with pm.Model() as model: # Claim prior part eta1 = pm.HalfCauchy("eta1", beta=5) # for eta of gaussian process lengthscale = pm.Gamma("lengthscale", alpha=2, beta=1, shape=(p+q)) # for lengthscale of gaussian process tf = pm.Beta("tf", alpha=2, beta=2, shape=q) # for calibration parameters sigma1 = pm.HalfCauchy('sigma1', beta=5) # for noise y_pred = pm.Normal('y_pred', 0, 1.5, shape=(n_pred,n_y)) # for y prediction # Setup prior of right cholesky matrix sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=n_y) colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=n_y, eta=2,sd_dist=sd_dist) colchol = pm.expand_packed_triangular(n_y, colchol_packed) # Concate data into a big matrix[[xf tf], [xc tc], [x_pred tf]] xf1 = tt.concatenate([xf, tt.fill(tt.zeros([n,q]), tf)], axis = 1) x_pred1 = tt.concatenate([x_pred, tt.fill(tt.zeros([n_pred,q]), tf)], axis = 1) X = tt.concatenate([xf1, xc, x_pred1], axis = 0) # Concate data into a big matrix[[yf], [yc], [y_pred]] y = tt.concatenate([yf, yc, y_pred], axis = 0) # Covariance funciton of gaussian process cov_z = eta1**2 * pm.gp.cov.ExpQuad((p+q), ls=lengthscale) # Gaussian process with covariance funciton of cov_z gp = MultiMarginal(cov_func = cov_z) # Bayesian inference matrix_shape = [n+m+n_pred,n_y] outcome = gp.marginal_likelihood("outcome", X=X, y=y, colchol=colchol, noise=sigma1, matrix_shape=matrix_shape) trace = pm.sample(250,cores=1) # This part is for data collection and visualization pm.summary(trace).to_csv(output_folder + '/trace_summary.csv') print(pm.summary(trace)) name_columns = [] n_columns = n_pred for i in range(n_columns): for j in range(n_y): name_columns.append('y'+str(j+1)+'_pred'+str(i+1)) y_prediction = pd.DataFrame(np.array(trace['y_pred']).reshape(500,n_pred*n_y),columns=name_columns) #Draw Picture of cvrmse_dist and calculate index for i in range(n_y): index = list(range(0+i,n_pred*n_y+i,n_y)) y_prediction1 = pd.DataFrame(y_prediction.iloc[:,index]) y_prediction1 = y_prediction1*yc_sd[i]+yc_mean[i] # Scale y_prediction back y_prediction1.to_csv(output_folder + '/y_pred'+str(i+1)+'.csv') # Store y_prediction # Calculate the distribution of cvrmse cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction1-y_true[:,i]),axis=1)/n_pred)/np.mean(y_true[:,i]) # Calculate the index and store it into csv index_cal(y_prediction1,y_true[:,i]).to_csv(output_folder + '/index'+str(i+1)+'.csv') # Draw pictrue of cvrmse distribution of each y plt.subplot(n_y, 1, i+1) plt.hist(cvrmse) plt.savefig(output_folder + '/cvrmse_dist.pdf') plt.close() #Draw Picture of Prediction_Plot for i in range(n_y): index = list(range(0+i,n_pred*n_y+i,n_y)) y_prediction_mean = np.array(pm.summary(trace)['mean'][index])*yc_sd[i]+yc_mean[i] y_prediction_975 = np.array(pm.summary(trace)['hpd_97.5'][index])*yc_sd[i]+yc_mean[i] y_prediction_025 = np.array(pm.summary(trace)['hpd_2.5'][index])*yc_sd[i]+yc_mean[i] plt.subplot(n_y, 1, i+1) # estimated probability plt.scatter(x=range(n_pred), y=y_prediction_mean) # error bars on the estimate plt.vlines(range(n_pred), ymin=y_prediction_025, ymax=y_prediction_975) # actual outcomes plt.scatter(x=range(n_pred), y=y_true[:,i], marker='x') plt.xlabel('predictor') plt.ylabel('outcome') # This is just to print original cvrmse to test whether outcome good if i == 0: cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true[:,0]))/len(y_prediction_mean-y_true[:,0]))/np.mean(y_true[:,0]) print(cvrmse) plt.savefig(output_folder + '/Prediction_Plot.pdf') plt.close()
for i in range(3): betas = pm.Normal(name=f"betas_{i}", sd=2.5, shape=1, testval=0) pi = pm.math.sigmoid(pm.math.matrix_dot(X, betas)) pm.Bernoulli(name=f"Y_{i}", p=pi, observed=Y[:, i]) trace = pm.sample(12000, tune=2000) print(pm.summary(trace)) # Bayesian multivariate LVM with pm.Model(): B = pm.Normal(name=f"B", sd=2.5, shape=B.shape, testval=0) Mu = pm.math.matrix_dot(X, B) # Prior on the correlation matrix ---------------------------------------------- f = pm.Lognormal.dist(sd=1) L = pm.LKJCholeskyCov(name="L", eta=1, n=3, sd_dist=f) ch = pm.expand_packed_triangular(3, L, lower=True) cov = pm.math.matrix_dot(ch, ch.T) sd = tt.sqrt(tt.diag(cov)) Theta = pm.Deterministic("Theta", cov / sd[:, None] / sd[None, :]) # ------------------------------------------------------------------------------ Psi = pm.MvNormal(name="Psi", mu=Mu, cov=Theta, shape=Y.shape) Pi = pm.math.sigmoid(Psi) pm.Bernoulli(name="Y", p=Pi, observed=Y) trace = pm.sample(15000, tune=5000) print(pm.summary(trace, var_names=["B", "Theta"]))
] ) cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds))) dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs) with pm.Model() as model: mu = pm.Normal("mu", mu=0, sigma=1, shape=n_var) # Note that we access the distribution for the standard # deviations, and do not create a new random variable. sd_dist = pm.HalfCauchy.dist(beta=2.5) packed_chol = pm.LKJCholeskyCov("chol_cov", n=n_var, eta=1, sd_dist=sd_dist) # compute the covariance matrix chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True) cov = tt.dot(chol, chol.T) # Extract the standard deviations etc sd = pm.Deterministic("sd", tt.sqrt(tt.diag(cov))) corr = tt.diag(sd ** -1).dot(cov.dot(tt.diag(sd ** -1))) r = pm.Deterministic("r", corr[np.triu_indices(n_var, k=1)]) like = pm.MvNormal("likelihood", mu=mu, chol=chol, observed=dataset) def run(n=1000): if n == "short": n = 50 with model: trace = pm.sample(n)
cov=mu_prior_cov, shape=numberOfFeatures) mu0 = pm.MvNormal('estimated_mu0', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures) # Prior for gaussian covariance matrix (LKJ prior): # see here for details: http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html # and here: http://docs.pymc.io/notebooks/LKJ.html sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=numberOfFeatures) chol_packed = pm.LKJCholeskyCov('chol_packed', n=numberOfFeatures, eta=2, sd_dist=sd_dist) chol = pm.expand_packed_triangular(numberOfFeatures, chol_packed) cov_mx = pm.Deterministic('estimated_cov', chol.dot(chol.T)) # observations x1, x0 are supposed to be P(x|y=class1)=N(mu1,cov_both), P(x|y=class0)=N(mu0,cov_both) # here is where the Dataset (x1,x0) comes to influence the choice of paramters (mu1,mu0, cov_both) # this is done through the "observed = ..." argument; note that above we didn't have that x1_obs = pm.MvNormal('x1', mu=mu1, chol=chol, observed=x1) x0_obs = pm.MvNormal('x0', mu=mu0, chol=chol, observed=x0) # done with setting up the model # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value" map_estimate1 = pm.find_MAP(model=basic_model) cov_est = map_estimate1['estimated_cov'] mu0_est = map_estimate1['estimated_mu0']
def find_optimal_projection(inFile): """ The function calculates the "A Wasserstein-type distance" (s. https://arxiv.org/pdf/1907.05254.pdf) between the gaussian mixture distributions characterizing tumor and non-tumor tissue for each selection of 2 components. It returns the w components with the aximal statistical distance between the two distribution for visualization purposes, e.g., expression_plot. """ par = get_params(inFile) n_components = par["means"]["mus_f_0"].shape[0] n_kernel = 0 for var in sorted(par["means"]): n_kernel += "mus_f" in var if "altStick" in par["note"] and not par["note"]["altStick"]: tf = StickBreaking_legacy() else: tf = StickBreaking2() weights = dict() means = dict() covs = dict() for tissue_type in ["t", "f"]: weights[tissue_type] = tf.backward( par["means"][f"w_{tissue_type}_stickbreaking__"]).eval() means[tissue_type] = list() covs[tissue_type] = list() for kernel in range(n_kernel): means[tissue_type].append( par["means"][f"mus_{tissue_type}_{kernel}"]) # get covariance elipse parameters packed_cov = par["means"][ f"packed_L_{tissue_type}_{kernel}_cholesky-cov-packed__"] lower = pm.expand_packed_triangular(n_components, packed_cov, lower=True).eval() cov = np.dot(lower, lower.T) covs[tissue_type].append(cov) means[tissue_type] = np.stack(means[tissue_type]) covs[tissue_type] = np.stack(covs[tissue_type]) def get_distance(pair): cp1, cp2 = pair mean_t = means["t"][:, [cp1, cp2]] mean_f = means["f"][:, [cp1, cp2]] cov_t = covs["t"][:, [cp1, cp2], :][:, :, [cp1, cp2]] cov_f = covs["f"][:, [cp1, cp2], :][:, :, [cp1, cp2]] _, distance = GW2(weights["t"], weights["f"], mean_t, mean_f, cov_t, cov_f) return cp1, cp2, distance pairs = itertools.combinations(range(n_components), 2) total_pairs = int((n_components**2 - n_components) / 2) results = list() with mp.Pool() as pool: for cp1, cp2, distance in tqdm( pool.imap(get_distance, pairs), total=total_pairs, desc="projections", ): results.append({"cp1": cp1 + 1, "cp2": cp2 + 1, "GW2": distance}) result_df = pd.DataFrame(results, index=range(total_pairs)) max_dist = result_df["GW2"].argmax() max_cp1 = result_df["cp1"][max_dist] max_cp2 = result_df["cp2"][max_dist] return max_cp1, max_cp2, result_df
def expression_plot( inFile, cp1=1, cp2=2, model=None, draw_distribution=True, draw_points=True, max_kernel_alpha=0.5, color="expression", ): par = get_params(inFile) pl = (pn.ggplot(pn.aes(f"CP {cp1}", f"CP {cp2}", color=color)) + pn.theme_minimal()) df = None kdf = None if draw_points: if model is None: index = [ f"sample {i+1}" for i in range(par["means"]["x_t"].shape[0]) ] if color != "expression": raise Exception( "A model must be passed to color other that by expression." ) else: index = model.counts.columns columns = [f"CP {i+1}" for i in range(par["means"]["x_t"].shape[1])] df_t = pd.DataFrame(par["means"]["x_t"], index=index, columns=columns) df_t["expression"] = "tumor" df_tf = pd.DataFrame(par["means"]["x_f"], index=index, columns=columns) df_tf["expression"] = "non-tumor" df = pd.concat([df_t, df_tf]) if model is not None: df = df.merge(model.pheno, "left", left_index=True, right_index=True) pl += pn.geom_point(data=df, alpha=0.3) if draw_distribution: n_kernel = 0 for var in sorted(par["means"]): n_kernel += "mus_f" in var if "altStick" in par["note"] and not par["note"]["altStick"]: tf = StickBreaking_legacy() else: tf = StickBreaking2() elipses = list() elipse_t = np.linspace(0, 2 * np.pi, 100) for tissue_type in ["t", "f"]: weights = tf.backward( par["means"][f"w_{tissue_type}_stickbreaking__"]).eval() n_dim = par["means"][f"x_{tissue_type}"].shape[1] for kernel in range(n_kernel): # get covariance elipse parameters packed_cov = par["means"][ f"packed_L_{tissue_type}_{kernel}_cholesky-cov-packed__"] lower = pm.expand_packed_triangular(n_dim, packed_cov, lower=True).eval() cov = np.dot(lower, lower.T)[[cp1 - 1, cp2 - 1], :][:, [cp1 - 1, cp2 - 1]] var, U = np.linalg.eig(cov) theta = np.arccos(np.abs(U[0, 0])) # parametrize elipse width = 2 * np.sqrt(5.991 * var[0]) hight = 2 * np.sqrt(5.991 * var[1]) density = weights[kernel] / width * hight x = width * np.cos(elipse_t) y = hight * np.sin(elipse_t) # rotation c, s = np.cos(theta), np.sin(theta) R = np.array(((c, -s), (s, c))) path = np.dot(R, np.array([x, y])) # position pos = par["means"][f"mus_{tissue_type}_{kernel}"] path += pos[[cp1 - 1, cp2 - 1]][:, None] # make data frame path_df = pd.DataFrame({ f"CP {cp1}": path[0, :], f"CP {cp2}": path[1, :] }) path_df["kernel"] = kernel path_df["density"] = density path_df["expression"] = ("tumor" if tissue_type == "t" else "non-tumor") path_df["expression-kernel"] = (f"tumor {kernel}" if tissue_type == "t" else f"non-tumor {kernel}") elipses.append(path_df) kdf = pd.concat(elipses) density_scale = max_kernel_alpha / kdf["density"].max() kdf["density"] *= density_scale pl += pn.geom_polygon( pn.aes(fill="expression", group="expression-kernel", alpha="density"), data=kdf, ) pl += pn.scale_alpha_continuous(range=(0, max_kernel_alpha)) return pl, df, kdf
def build(self): with pm.Model() as env_model: # Generate region weights w_r = pm.MvNormal('w_r', mu=self.prior.loc_w_r, tau=self.prior.scale_w_r, shape=self.n_regions) # Generate Product weights #packed_L_p = pm.LKJCholeskyCov('packed_L_p', n=self.n_products, # eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) #L_p = pm.expand_packed_triangular(self.n_products, packed_L_p) mu_p = pm.MvNormal("mu_p", mu=self.prior.loc_w_p, cov=np.eye(self.n_products), shape=self.n_products) #w_p = pm.MvNormal('w_p', mu=mu_p, chol=L_p, # shape=self.n_products) w_p = pm.MvNormal('w_p', mu=mu_p, cov=self.prior.scale_w_p, shape=self.n_products) # Generate previous sales weight loc_w_s = pm.HalfCauchy('loc_w_s', 1.0) scale_w_s = pm.HalfCauchy('scale_w_s', 2.5) w_s = pm.TruncatedNormal('w_s', mu=loc_w_s, sigma=scale_w_s, lower=0.0) # Generate temporal weights packed_L_t = pm.LKJCholeskyCov('packed_L_t', n=self.n_temporal_features, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) L_t = pm.expand_packed_triangular(self.n_temporal_features, packed_L_t) mu_t = pm.MvNormal("mu_t", mu=self.prior.loc_w_t, cov=self.prior.scale_w_t, shape=self.n_temporal_features) w_t = pm.MvNormal('w_t', mu=mu_t, chol=L_t, shape=self.n_temporal_features) lambda_c_t = pm.math.dot(self.X_temporal, w_t.T) bias_q_loc = pm.Normal('bias_q_loc', mu=0.0, sigma=1.0) bias_q_scale = pm.HalfCauchy('bias_q_scale', 5.0) bias_q = pm.Normal("bias_q", mu=bias_q_loc, sigma=bias_q_scale) if self.log_linear: lambda_q = pm.math.exp(bias_q + lambda_c_t[self.time_stamps] + pm.math.dot(self.X_region, w_r.T) + pm.math.dot(self.X_product, w_p.T) + w_s * self.X_lagged) else: lambda_q = bias_q + lambda_c_t[self.time_stamps] + pm.math.dot( self.X_region, w_r.T) + pm.math.dot( self.X_product, w_p.T) + w_s * self.X_lagged sigma_q_ij = pm.InverseGamma("sigma_q_ij", alpha=self.prior.loc_sigma_q_ij, beta=self.prior.scale_sigma_q_ij) q_ij = pm.TruncatedNormal('quantity_ij', mu=lambda_q, sigma=sigma_q_ij, lower=0.0, observed=self.y) return env_model
def create_model(self): """ Creates and returns the PyMC3 model. Note: The size of the shared variables must match the size of the training data. Otherwise, setting the shared variables later will raise an error. See http://docs.pymc.io/advanced_theano.html The DensityDist class is used as the likelihood term. The second argument, logp_gmix(mus, pi, np.eye(D)), is a python function which recieves observations (denoted by 'value') and returns the tensor representation of the log-likelihood. Returns ---------- the PyMC3 model """ model_input = theano.shared( np.zeros([self.num_training_samples, self.num_pred])) # model_output = theano.shared(np.zeros(self.num_training_samples)) # model_truncate = theano.shared(np.zeros(self.num_training_samples, # dtype='int')) self.shared_vars = { 'model_input': model_input # , # 'model_output': model_output, # 'model_truncate': model_truncate } # Log likelihood of normal distribution # def logp_normal(mu, tau, value): # # log probability of individual samples # k = tau.shape[0] # # def delta(mu): # return value - mu # # delta = lambda mu: value - mu # return (-1 / 2.) * (k * T.log(2 * np.pi) + T.log(1./det(tau)) + # (delta(mu).dot(tau) * delta( # mu)).sum(axis=1)) # Log likelihood of Gaussian mixture distribution # def logp_gmix(mus, pi, tau): # def logp_(value): # logps = [T.log(pi[i]) + logp_normal(mu, tau, value) # for i, mu in enumerate(mus)] # # return T.sum( # logsumexp(T.stacklists(logps)[:, :self.num_training_samples], # axis=0)) # # return logp_ def stick_breaking(v): portion_remaining = tt.concatenate([[1], tt.extra_ops.cumprod(1 - v)[:-1]]) return v * portion_remaining model = pm.Model() with model: K = self.num_truncate D = self.num_pred alpha = pm.Gamma('alpha', 1.0, 1.0) v = pm.Beta('v', 1, alpha, shape=K) pi_ = stick_breaking(v) pi = pm.Deterministic('pi', pi_ / pi_.sum()) means = tt.stack([ pm.Uniform('cluster_center_{}'.format(k), lower=0., upper=10., shape=D) for k in range(K) ]) lower = tt.stack([ pm.LKJCholeskyCov('cluster_variance_{}'.format(k), n=D, eta=2., sd_dist=pm.HalfNormal.dist(sd=1.)) for k in range(K) ]) chol = tt.stack( [pm.expand_packed_triangular(D, lower[k]) for k in range(K)]) component_dists = [ pm.MvNormal('component_dist_%d' % k, mu=means[k], chol=chol[k], shape=D) for k in range(K) ] # rand = [pm.MvNormal( # 'rand_{}'.format(k), # mu=means[k], chol=Chol[k], shape=D) for k in range(K)] rand = pm.Normal.dist(0, 1).random X = pm.DensityDist( 'X', logp_gmix( mus=component_dists, pi=pi, tau=np.eye(D), num_training_samples=model_input.get_value().shape[0]), observed=model_input, random=rand) return model
def bsem( items, factors, paths, beta=0, nu_sd=2.5, alpha_sd=2.5, d_beta=2.5, corr_items=False, corr_factors=False, g_eta=100, l_eta=1, beta_beta=1, ): r"""Constructs Bayesian SEM. Args: items (np.array): Array of item data. factors (np.array): Factor design. paths (np.array): Array of directed factor paths. beta (:obj:`float` or `'estimate'`, optional): Standard deviation of normal prior on cross loadings. If `'estimate'`, beta is estimated from the data. nu_sd (:obj:`float`, optional): Standard deviation of normal prior on item intercepts. alpha_sd (:obj:`float`, optional): Standard deviation of normal prior on factor intercepts. d_beta (:obj:`float`, optional): Scale parameter of half-Cauchy prior on factor standard deviation. corr_factors (:obj:`bool`, optional): Allow correlated factors. corr_items (:obj:`bool`, optional): Allow correlated items. g_eta (:obj:`float`, optional): Shape parameter of LKJ prior on residual item correlation matrix. l_eta (:obj:`float`, optional): Shape parameter of LKJ prior on factor correlation matrix. beta_beta (:obj:`float`, optional): Beta parameter of beta prior on beta. Returns: None: Places model in context. """ # get numbers of cases, items, and factors n, p = items.shape p_, m = factors.shape assert p == p_, "Mismatch between data and factor-loading matrices" assert paths.shape == (m, m), "Paths matrix has wrong shape" I = tt.eye(m, m) # place priors on item and factor intercepts nu = pm.Normal(name=r"$\nu$", mu=0, sd=nu_sd, shape=p, testval=items.mean(axis=0)) alpha = pm.Normal(name=r"$\alpha$", mu=0, sd=alpha_sd, shape=m, testval=np.zeros(m)) # place priors on unscaled factor loadings Phi = pm.Normal(name=r"$\Phi$", mu=0, sd=1, shape=factors.shape, testval=factors) # place priors on paths B = tt.zeros(paths.shape) npths = np.sum(paths, axis=None) print(npths) if npths > 0: b = pm.Normal(name=r"$b$", mu=0, sd=1, shape=npths, testval=np.ones(npths)) # create the paths matrix k = 0 for i in range(m): for j in range(m): if paths[i, j] == 1: B = tt.set_subtensor(B[i, j], b[k]) k += 1 Gamma = pm.Deterministic("$\Gamma$", B) # create masking matrix for factor loadings if isinstance(beta, str): assert beta == "estimate", f"Don't know what to do with '{beta}'" beta = pm.Beta(name=r"$\beta$", alpha=1, beta=beta_beta, testval=0.1) M = (1 - np.asarray(factors)) * beta + np.asarray(factors) # create scaled factor loadings Lambda = pm.Deterministic(r"$\Lambda$", Phi * M) # determine item means mu = nu + matrix_dot(Lambda, alpha) # place priors on item standard deviations D = pm.HalfCauchy(name=r"$D$", beta=d_beta, shape=p, testval=items.std(axis=0)) # place priors on item correlations f = pm.Lognormal.dist(sd=0.25) if not corr_items: Omega = np.eye(p) else: G = pm.LKJCholeskyCov(name=r"$G$", eta=g_eta, n=p, sd_dist=f) ch1 = pm.expand_packed_triangular(p, G, lower=True) K = tt.dot(ch1, ch1.T) sd1 = tt.sqrt(tt.diag(K)) Omega = pm.Deterministic(r"$\Omega$", K / sd1[:, None] / sd1[None, :]) # determine residual item variances and covariances Theta = pm.Deterministic(r"$\Theta$", D[None, :] * Omega * D[:, None]) # place priors on factor correlations if not corr_factors: Psi = np.eye(m) else: L = pm.LKJCholeskyCov(name=r"$L$", eta=l_eta, n=m, sd_dist=f) ch = pm.expand_packed_triangular(m, L, lower=True) Gamma = tt.dot(ch, ch.T) sd = tt.sqrt(tt.diag(Gamma)) Psi = pm.Deterministic(r"$\Psi$", Gamma / sd[:, None] / sd[None, :]) # determine variances and covariances of items A = matrix_inverse(I - Gamma) C = matrix_inverse(I - Gamma.T) Sigma = matrix_dot(Lambda, A, Psi, C, Lambda.T) + Theta # place priors on observations pm.MvNormal(name="$Y$", mu=mu, cov=Sigma, observed=items, shape=items.shape)
with pm.Model() as model: # Hyperpriors for mixture components' means/cov matrices mus = [pm.MvNormal('mu_'+str(k), mu=np.zeros(D,dtype=np.float32), cov=10000*np.eye(D), shape=(D,)) for k in range(K)] taus = [] sd_dist = pm.HalfCauchy.dist(beta=10000) for k in range(K): packed_chol = pm.LKJCholeskyCov('packed_chol'+str(k), n=D, eta=1, sd_dist=sd_dist) chol = pm.expand_packed_triangular(n=D, packed=packed_chol) invchol = solve_lower_triangular(chol,np.eye(D)) taus.append(tt.dot(invchol.T,invchol)) # Mixture density pi = pm.Dirichlet('pi',a=np.ones(K),shape=(K,)) B = pm.DensityDist('B', logp_gmix(mus,pi,taus), shape=(n_samples,D)) Y_hat = tt.sum(X[:,:,np.newaxis]*B.reshape((n_samples,D//2,2)),axis=1) # Model error err = pm.HalfCauchy('err',beta=10) # Data likelihood Y_logp = pm.MvNormal('Y_logp', mu=Y_hat, cov=err*np.eye(2), observed=Y)