def test_model_not_drawable_prior(self): data = np.random.poisson(lam=10, size=200) model = pm.Model() with model: mu = pm.HalfFlat("sigma") pm.Poisson("foo", mu=mu, observed=data) trace = pm.sample(tune=1000) with model: with pytest.raises(ValueError) as excinfo: pm.sample_prior_predictive(50) assert "Cannot sample" in str(excinfo.value) samples = pm.sample_posterior_predictive(trace, 50) assert samples["foo"].shape == (50, 200)
def test_shape_edgecase(self): with pm.Model(): mu = pm.Normal("mu", shape=5) sd = pm.Uniform("sd", lower=2, upper=3) x = pm.Normal("x", mu=mu, sigma=sd, shape=5) prior = pm.sample_prior_predictive(10) assert prior["mu"].shape == (10, 5)
def simulate_poiszero_hmm(N, mu=10.0, pi_0_a=np.r_[1, 1], p_0_a=np.r_[5, 1], p_1_a=np.r_[1, 1]): with pm.Model() as test_model: p_0_rv = pm.Dirichlet("p_0", p_0_a) p_1_rv = pm.Dirichlet("p_1", p_1_a) P_tt = tt.stack([p_0_rv, p_1_rv]) P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt)) pi_0_tt = pm.Dirichlet("pi_0", pi_0_a) S_rv = HMMStateSeq("S_t", P_rv, pi_0_tt, shape=N) Y_rv = PoissonZeroProcess("Y_t", mu, S_rv, observed=np.zeros(N)) sample_point = pm.sample_prior_predictive(samples=1) # TODO FIXME: Why is `pm.sample_prior_predictive` adding an extra # dimension to the `Y_rv` result? sample_point[Y_rv.name] = sample_point[Y_rv.name].squeeze() return sample_point, test_model
def test_shape_edgecase(self): with pm.Model(): mu = pm.Normal('mu', shape=5) sd = pm.Uniform('sd', lower=2, upper=3) x = pm.Normal('x', mu=mu, sd=sd, shape=5) prior = pm.sample_prior_predictive(10) assert prior['mu'].shape == (10, 5)
def _initial_population(draws, model, variables, start): """ Create an initial population from the prior """ population = [] var_info = OrderedDict() if start is None: init_rnd = pm.sample_prior_predictive( draws, var_names=[v.name for v in model.unobserved_RVs], model=model) else: init_rnd = start init = model.test_point for v in variables: var_info[v.name] = (init[v.name].shape, init[v.name].size) for i in range(draws): point = pm.Point({v.name: init_rnd[v.name][i] for v in variables}, model=model) population.append(model.dict_to_array(point)) return np.array(floatX(population)), var_info
def test_multivariate(self): with pm.Model(): m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4) trace = pm.sample_prior_predictive(10) assert m.random(size=10).shape == (10, 4) assert trace["m"].shape == (10, 4)
def test_shared_data_as_index(self): """ Allow pm.Data to be used for index variables, i.e with integers as well as floats. See https://github.com/pymc-devs/pymc3/issues/3813 """ with pm.Model() as model: index = pm.Data("index", [2, 0, 1, 0, 2]) y = pm.Data("y", [1.0, 2.0, 3.0, 2.0, 1.0]) alpha = pm.Normal("alpha", 0, 1.5, shape=3) pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y) prior_trace = pm.sample_prior_predictive(1000, var_names=["alpha"]) trace = pm.sample(1000, init=None, tune=1000, chains=1) # Predict on new data new_index = np.array([0, 1, 2]) new_y = [5.0, 6.0, 9.0] with model: pm.set_data(new_data={"index": new_index, "y": new_y}) pp_trace = pm.sample_posterior_predictive( trace, 1000, var_names=["alpha", "obs"]) pp_tracef = pm.fast_sample_posterior_predictive( trace, 1000, var_names=["alpha", "obs"]) assert prior_trace["alpha"].shape == (1000, 3) assert trace["alpha"].shape == (1000, 3) assert pp_trace["alpha"].shape == (1000, 3) assert pp_trace["obs"].shape == (1000, 3) assert pp_tracef["alpha"].shape == (1000, 3) assert pp_tracef["obs"].shape == (1000, 3)
def sample_all(self, *, model: pm.Model = None, var_names: List[str], **sampler_kwargs) -> arviz.InferenceData: """ Sample the model and return the trace. Parameters ---------- model : optional A model previously created using `self.build_model()`. Build a new model if None (default) var_names: List[str] Variables names passed to `pm.fast_sample_posterior_predictive` **sampler_kwargs : dict Additional arguments to `pm.sample` """ if model is None: model = self.build_model() with model: prior_checks = pm.sample_prior_predictive() trace = pm.sample(return_inferencedata=False, **sampler_kwargs) post_checks = pm.fast_sample_posterior_predictive( trace, var_names=var_names) return arviz.from_pymc3( trace=trace, prior=prior_checks, posterior_predictive=post_checks, model=model, )
def get_bayesian_model(cat_cols, num_cols): # Preprocessing for numerical data numeric_transformer = Pipeline(steps=[ ('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())]) # Preprocessing for categorical data categorical_transformer = Pipeline(steps=[ ('imputer', SimpleImputer(strategy='most_frequent', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore', sparse=False))]) # Bundle preprocessing for numerical and categorical data preprocessor = ColumnTransformer( transformers=[ ('num', numeric_transformer, num_cols), ('cat', categorical_transformer, cat_cols)]) with pm.Model() as linear_model: weights = pm.Normal('weights', mu=0, sigma=1) noise = pm.Gamma('noise', alpha=2, beta=1) y_observed = pm.Normal('y_observed', mu=0, sigma=10, observed=y_test) prior = pm.sample_prior_predictive() posterior = pm.sample() posterior_pred_clf = pm.sample_posterior_predictive(posterior) # Bundle preprocessing and modeling code in a pipeline model = Pipeline(steps=[('preprocessor', preprocessor),('classifier', posterior_pred_clf)]) return model
def test_sample_generate_values(fixture_model, fixture_sizes): model, RVs = fixture_model size = to_tuple(fixture_sizes) with model: prior = pm.sample_prior_predictive(samples=fixture_sizes) for rv in RVs: assert prior[rv.name].shape == size + tuple(rv.distribution.shape)
def simulate_poiszero_hmm(N, mu=10.0, pi_0_a=np.r_[1, 1], p_0_a=np.r_[5, 1], p_1_a=np.r_[1, 1]): with pm.Model() as test_model: p_0_rv = pm.Dirichlet("p_0", p_0_a) p_1_rv = pm.Dirichlet("p_1", p_1_a) P_tt = tt.stack([p_0_rv, p_1_rv]) P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt)) pi_0_tt = pm.Dirichlet("pi_0", pi_0_a) S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=N) PoissonZeroProcess("Y_t", mu, S_rv, observed=np.zeros(N)) sample_point = pm.sample_prior_predictive(samples=1) # Remove the extra "sampling" dimension from the sample results sample_point = {k: v.squeeze(0) for k, v in sample_point.items()} # Remove the extra dimension added due to `pm.sample_prior_predictive` # forcing `size=1` in its call to `test_model.Y_t.random`. sample_point["Y_t"] = sample_point["Y_t"].squeeze(0) return sample_point, test_model
def test_no_trace(self): with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable trace = pm.sample(100, tune=100) prior = pm.sample_prior_predictive() posterior_predictive = pm.sample_posterior_predictive(trace) # Only prior inference_data = from_pymc3(prior=prior, model=model) test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails # Only posterior_predictive inference_data = from_pymc3(posterior_predictive=posterior_predictive, model=model) test_dict = {"posterior_predictive": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails # Prior and posterior_predictive but no trace inference_data = from_pymc3( prior=prior, posterior_predictive=posterior_predictive, model=model ) test_dict = { "prior": ["beta"], "prior_predictive": ["obs"], "posterior_predictive": ["obs"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_bounded_dist(self): with pm.Model() as model: BoundedNormal = pm.Bound(pm.Normal, lower=0.0) x = BoundedNormal("x", mu=aet.zeros((3, 1)), sd=1 * aet.ones((3, 1)), shape=(3, 1)) with model: prior_trace = pm.sample_prior_predictive(5) assert prior_trace["x"].shape == (5, 3, 1)
def test_model_not_drawable_prior(self): data = np.random.poisson(lam=10, size=200) model = pm.Model() with model: mu = pm.HalfFlat("sigma") pm.Poisson("foo", mu=mu, observed=data) trace = pm.sample(tune=1000) with model: with pytest.raises(ValueError) as excinfo: pm.sample_prior_predictive(50) assert "Cannot sample" in str(excinfo.value) samples = pm.sample_posterior_predictive(trace, 40) assert samples["foo"].shape == (40, 200) samples = pm.fast_sample_posterior_predictive(trace, 40) assert samples["foo"].shape == (40, 200)
def test_sample_from_xarray_prior(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture with pmodel: prior = pm.sample_prior_predictive(samples=20) idat = az.from_pymc3(trace, prior=prior) with pmodel: pp = pm.sample_posterior_predictive(idat.prior, var_names=["d"])
def sample_prior(self, samples=10): r""" Take samples from the prior, see `pymc3.sample_prior_predictive` for details :return: self.prior_trace dictionary with an element for each parameter of the model. """ # Take one sample from the prior (fails for more due to tensor dimensions problem) with self.model: self.prior_trace = pm.sample_prior_predictive(samples=samples)
def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta('theta', alpha=1, beta=1) psi = pm.HalfNormal('psi', sd=1) pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data['theta'].shape == (5000,) assert gen_data['psi'].shape == (5000,) assert gen_data['suppliers'].shape == (5000, 20)
def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta("theta", alpha=1, beta=1) psi = pm.HalfNormal("psi", sd=1) pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data["theta"].shape == (5000,) assert gen_data["psi"].shape == (5000,) assert gen_data["suppliers"].shape == (5000, 20)
def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta('theta', alpha=1, beta=1) psi = pm.HalfNormal('psi', sd=1) pm.ZeroInflatedPoisson('suppliers', psi=psi, theta=theta, shape=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data['theta'].shape == (5000, ) assert gen_data['psi'].shape == (5000, ) assert gen_data['suppliers'].shape == (5000, 20)
def test_respects_shape(self): for shape in (2, (2,), (10, 2), (10, 10)): with pm.Model(): mu = pm.Gamma("mu", 3, 1, shape=1) goals = pm.Poisson("goals", mu, shape=shape) trace = pm.sample_prior_predictive(10) if shape == 2: # want to test shape as an int shape = (2,) assert trace["goals"].shape == (10,) + shape
def test_respects_shape(self): for shape in (2, (2,), (10, 2), (10, 10)): with pm.Model(): mu = pm.Gamma('mu', 3, 1, shape=1) goals = pm.Poisson('goals', mu, shape=shape) trace = pm.sample_prior_predictive(10) if shape == 2: # want to test shape as an int shape = (2,) assert trace['goals'].shape == (10,) + shape
def test_shared(self): n1 = 10 obs = shared(np.random.rand(n1) < 0.5) draws = 50 with pm.Model() as m: p = pm.Beta("p", 1.0, 1.0) y = pm.Bernoulli("y", p, observed=obs) gen1 = pm.sample_prior_predictive(draws) assert gen1["y"].shape == (draws, n1) n2 = 20 obs.set_value(np.random.rand(n2) < 0.5) with m: gen2 = pm.sample_prior_predictive(draws) assert gen2["y"].shape == (draws, n2)
def sample_prior(self, distribution, shape, nested_rvs_info, prior_samples): model, rv, nested_rvs = self.build_model( distribution, shape, nested_rvs_info, ) with model: return pm.sample_prior_predictive(prior_samples)
def sample_all(var_names: List[str], **sampler_kwargs): prior_checks = pm.sample_prior_predictive(var_names=var_names) posterior = pm.sample(return_inferencedata=False, **sampler_kwargs) post_checks = pm.sample_posterior_predictive(posterior, var_names=var_names) return arviz.from_pymc3(posterior, prior=prior_checks, posterior_predictive=post_checks)
def test_density_dist(self): obs = np.random.normal(-1, 0.1, size=10) with pm.Model(): mu = pm.Normal('mu', 0, 1) sd = pm.Gamma('sd', 1, 2) a = pm.DensityDist('a', pm.Normal.dist(mu, sd).logp, random=pm.Normal.dist(mu, sd).random, observed=obs) prior = pm.sample_prior_predictive() npt.assert_almost_equal(prior['a'].mean(), 0, decimal=1)
def test_sample_generate_values(fixture_model, fixture_sizes): model, RVs = fixture_model size = to_tuple(fixture_sizes) if size == (1,): # Single draws are interpreted as scalars for backwards compatibility size = tuple() with model: prior = pm.sample_prior_predictive(samples=fixture_sizes) for rv in RVs: assert prior[rv.name].shape == size + tuple(rv.distribution.shape)
def test_sample(self): x = np.random.normal(size=100) y = x + np.random.normal(scale=1e-2, size=100) x_pred = np.linspace(-3, 3, 200, dtype="float32") with pm.Model(): x_shared = pm.Data("x_shared", x) b = pm.Normal("b", 0.0, 10.0) pm.Normal("obs", b * x_shared, np.sqrt(1e-2), observed=y) prior_trace0 = pm.sample_prior_predictive(1000) trace = pm.sample(1000, init=None, tune=1000, chains=1) pp_trace0 = pm.sample_posterior_predictive(trace, 1000) pp_trace01 = pm.fast_sample_posterior_predictive(trace, 1000) x_shared.set_value(x_pred) prior_trace1 = pm.sample_prior_predictive(1000) pp_trace1 = pm.sample_posterior_predictive(trace, samples=1000) pp_trace11 = pm.fast_sample_posterior_predictive(trace, samples=1000) assert prior_trace0["b"].shape == (1000, ) assert prior_trace0["obs"].shape == (1000, 100) assert prior_trace1["obs"].shape == (1000, 200) assert pp_trace0["obs"].shape == (1000, 100) assert pp_trace01["obs"].shape == (1000, 100) np.testing.assert_allclose(x, pp_trace0["obs"].mean(axis=0), atol=1e-1) np.testing.assert_allclose(x, pp_trace01["obs"].mean(axis=0), atol=1e-1) assert pp_trace1["obs"].shape == (1000, 200) assert pp_trace11["obs"].shape == (1000, 200) np.testing.assert_allclose(x_pred, pp_trace1["obs"].mean(axis=0), atol=1e-1) np.testing.assert_allclose(x_pred, pp_trace11["obs"].mean(axis=0), atol=1e-1)
def test_broadcasting_in_shape(self): with pm.Model() as model: mu = pm.Gamma("mu", 1.0, 1.0, shape=2) comp_dists = pm.Poisson.dist(mu, shape=2) mix = pm.MixtureSameFamily( "mix", w=np.ones(2) / 2, comp_dists=comp_dists, shape=(1000,) ) prior = pm.sample_prior_predictive(samples=self.n_samples) assert prior["mix"].shape == (self.n_samples, 1000)
def test_bounded_dist(self): with pm.Model() as model: BoundedNormal = pm.Bound(pm.Normal, lower=0.0) x = BoundedNormal( "x", mu=tt.zeros((3, 1)), sd=1 * tt.ones((3, 1)), shape=(3, 1) ) with model: prior_trace = pm.sample_prior_predictive(5) assert prior_trace["x"].shape == (5, 3, 1)
def sample_prior_causal_model(self, g, df, data_types, initialization_trace): if nx.is_directed_acyclic_graph(g): with pm.Model() as model: g = self.apply_data_types(g, data_types) g = self.apply_parents(g) g = self.apply_parameters(g, df, initialization_trace=initialization_trace) g = self.build_bayesian_network(g, df) trace = pm.sample_prior_predictive(1) else: raise Exception("Graph is not a DAG!") return g, trace
def generate_prior_predictive(model_name, data, get_model_path): # This shouldn't really depend on data but not sure if it is possible to do without # Or well it might depend on part of the data but not full data model = get_model(model_name=model_name, data=data, get_model_path=get_model_path) with model: prior = pm.sample_prior_predictive() raise Exception("Not finished yet")
def test_internal_missing_observations(): with Model() as model: obs1 = ma.masked_values([1, 2, -1, 4, -1], value=-1) obs2 = ma.masked_values([-1, -1, 6, -1, 8], value=-1) with pytest.warns(ImputationWarning): theta1 = Normal('theta1', mu=2, observed=obs1) with pytest.warns(ImputationWarning): theta2 = Normal('theta2', mu=theta1, observed=obs2) prior_trace = sample_prior_predictive() assert set(['theta1', 'theta2']) <= set(prior_trace.keys()) sample()
def get_inference_data(self): with self.model: prior = pm.sample_prior_predictive() posterior_predictive = pm.sample_posterior_predictive(self.obj) return from_pymc3( trace=self.obj, prior=prior, posterior_predictive=posterior_predictive, coords={"school": np.arange(self.data["J"])}, dims={"theta": ["school"], "theta_tilde": ["school"]}, )
def get_inference_data(self, data, eight_schools_params): with data.model: prior = pm.sample_prior_predictive() posterior_predictive = pm.sample_posterior_predictive(data.obj) return from_pymc3( trace=data.obj, prior=prior, posterior_predictive=posterior_predictive, coords={"school": np.arange(eight_schools_params["J"])}, dims={"theta": ["school"], "eta": ["school"]}, )
def test_sample_prior_and_posterior(self): def build_toy_dataset(N, K): pi = np.array([0.2, 0.5, 0.3]) mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]] stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]] x = np.zeros((N, 3), dtype=np.float32) y = np.zeros((N,), dtype=np.int) for n in range(N): k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal(mus[k], np.diag(stds[k])) y[n] = k return x, y N = 100 # number of data points K = 3 # number of mixture components D = 3 # dimensionality of the data X, y = build_toy_dataset(N, K) with pm.Model() as model: pi = pm.Dirichlet('pi', np.ones(K)) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): mu.append(pm.Normal('mu%i' % i, 0, 10, shape=D)) packed_chol.append( pm.LKJCholeskyCov('chol_cov_%i' % i, eta=2, n=D, sd_dist=pm.HalfNormal.dist(2.5)) ) chol.append(pm.expand_packed_triangular(D, packed_chol[i], lower=True)) comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i])) pm.Mixture('x_obs', pi, comp_dist, observed=X) with model: trace = pm.sample(30, tune=10, chains=1) n_samples = 20 with model: ppc = pm.sample_posterior_predictive(trace, n_samples) prior = pm.sample_prior_predictive(samples=n_samples) assert ppc['x_obs'].shape == (n_samples,) + X.shape assert prior['x_obs'].shape == (n_samples,) + X.shape assert prior['mu0'].shape == (n_samples, D) assert prior['chol_cov_0'].shape == (n_samples, D * (D + 1) // 2)
def test_multivariate2(self): # Added test for issue #3271 mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10) with pm.Model() as dm_model: probs = pm.Dirichlet("probs", a=np.ones(6), shape=6) obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data) burned_trace = pm.sample(20, tune=10, cores=1) sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model) sim_ppc = pm.sample_posterior_predictive(burned_trace, samples=20, model=dm_model) assert sim_priors['probs'].shape == (20, 6) assert sim_priors['obs'].shape == (20,) + obs.distribution.shape assert sim_ppc['obs'].shape == (20,) + obs.distribution.shape
def test_ignores_observed(self): observed = np.random.normal(10, 1, size=200) with pm.Model(): # Use a prior that's way off to show we're ignoring the observed variables mu = pm.Normal('mu', mu=-100, sd=1) positive_mu = pm.Deterministic('positive_mu', np.abs(mu)) z = -1 - positive_mu pm.Normal('x_obs', mu=z, sd=1, observed=observed) prior = pm.sample_prior_predictive() assert (prior['mu'] < 90).all() assert (prior['positive_mu'] > 90).all() assert (prior['x_obs'] < 90).all() assert prior['x_obs'].shape == (500, 200) npt.assert_array_almost_equal(prior['positive_mu'], np.abs(prior['mu']), decimal=4)
def _initial_population(draws, model, variables): """ Create an initial population from the prior """ population = [] var_info = {} start = model.test_point init_rnd = pm.sample_prior_predictive(draws, model=model) for v in variables: var_info[v.name] = (start[v.name].shape, start[v.name].size) for i in range(draws): point = pm.Point({v.name: init_rnd[v.name][i] for v in variables}, model=model) population.append(model.dict_to_array(point)) return np.array(floatX(population)), var_info
def test_transformed(self): n = 18 at_bats = 45 * np.ones(n, dtype=int) hits = np.random.randint(1, 40, size=n, dtype=int) draws = 50 with pm.Model() as model: phi = pm.Beta('phi', alpha=1., beta=1.) kappa_log = pm.Exponential('logkappa', lam=5.) kappa = pm.Deterministic('kappa', tt.exp(kappa_log)) thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=n) y = pm.Binomial('y', n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) assert gen['phi'].shape == (draws,) assert gen['y'].shape == (draws, n) assert 'thetas_logodds__' in gen
def test_transformed(self): n = 18 at_bats = 45 * np.ones(n, dtype=int) hits = np.random.randint(1, 40, size=n, dtype=int) draws = 50 with pm.Model() as model: phi = pm.Beta("phi", alpha=1.0, beta=1.0) kappa_log = pm.Exponential("logkappa", lam=5.0) kappa = pm.Deterministic("kappa", tt.exp(kappa_log)) thetas = pm.Beta( "thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=n ) y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) assert gen["phi"].shape == (draws,) assert gen["y"].shape == (draws, n) assert "thetas_logodds__" in gen