def test_stanc_no_warning() -> None: """No warnings.""" program_code = "parameters {real y;} model {y ~ normal(0,1);}" buffer = io.StringIO() with contextlib.redirect_stderr(buffer): stan.build(program_code=program_code) assert "warning" not in buffer.getvalue().lower()
def test_generated_quantities_seed(): fit1 = stan.build(program_code, data=data, random_seed=123).sample(num_samples=10) fit2 = stan.build(program_code, data=data, random_seed=123).sample(num_samples=10) fit3 = stan.build(program_code, data=data, random_seed=456).sample(num_samples=10) assert np.allclose(fit1["mean_y"], fit2["mean_y"]) assert not np.allclose(fit1["mean_y"], fit3["mean_y"])
def test_data_unmodified(posterior): # pull in posterior to cache compilation data_with_array = copy.deepcopy(data) # `build` will convert data into a list, should not change original data_with_array["y"] = np.array(data_with_array["y"], dtype=int) assert data_with_array["y"].dtype == int stan.build(program_code, data=data_with_array) # `data_with_array` should be unchanged assert not isinstance(data_with_array["y"], list) assert data_with_array["y"].dtype == int
def test_stanc_exception_semicolon(): program_code = """ parameters { real z real y } model { z ~ normal(0, 1); y ~ normal(0, 1);} """ with pytest.raises(RuntimeError, match=r'PARSER EXPECTED: ";"'): stan.build(program_code=program_code)
def test_stanc_exception_semicolon(): program_code = """ parameters { real z real y } model { z ~ normal(0, 1); y ~ normal(0, 1);} """ with pytest.raises(RuntimeError, match=r"Syntax error in"): stan.build(program_code=program_code)
def test_empty_parameter(self): model_code = """ parameters { real y; vector[3] x; vector[0] a; vector[2] z; } model { y ~ normal(0,1); } """ if pystan_version() == 2: from pystan import StanModel # pylint: disable=import-error model = StanModel(model_code=model_code) fit = model.sampling(iter=500, chains=2, check_hmc_diagnostics=False) else: import stan # pylint: disable=import-error model = stan.build(model_code) fit = model.sample(num_samples=500, num_chains=2) posterior = from_pystan(posterior=fit) test_dict = { "posterior": ["y", "x", "z", "~a"], "sample_stats": ["diverging"] } fails = check_multiple_attrs(test_dict, posterior) assert not fails
def run_stan_model(model_name, year, m=1, n=7, **kwargs): data = get_stan_data(m=m, n=n) with open(model_name, 'r') as f: stan_model = f.read() posterior = stan.build(stan_model, data) FIT = posterior.sample(**kwargs) return FIT
def __init__(self, code, data): # Get number of parameters (where vector parameters count as 1) n_params = StanLogPDF._count_parameters(code) # Store data # TODO: Deep clone data so that it can't be changed anymore? self._data = dict(data) # Build stan model posterior = stan.build(code, data=data) # Use httpstan to get access to the compiled module module = httpstan.models.import_services_extension_module( posterior.model_name) # Get flattened parameter names names = posterior.param_names[:n_params] dims = posterior.dims[:n_params] self._names = [] for name, dims in zip(names, dims): if dims: assert len(dims) == 1 self._names.extend( [name + '_' + str(i) for i in range(dims[0])]) else: self._names.append(name) # Get flattened parameter count self._n_parameters = len(self._names) # Get PDF and PDFS1 methods self._log_prob = module.log_prob self._log_prob_grad = module.log_prob_grad
def test_nan_inf(): posterior = stan.build(program_code) fit = posterior.sample() assert fit is not None assert math.isnan(fit["alpha"].ravel()[0]) assert math.isinf(fit["beta"].ravel()[0]) assert math.isinf(fit["gamma"].ravel()[0])
def test_bernoulli_random_seed_different(posterior): fits = [ stan.build(program_code, data=data, random_seed=seed).sample() for seed in (1, 2) ] for i, fit in enumerate(fits): print(i, fit["theta"].ravel()[:10]) assert not np.allclose(*[fit["theta"] for fit in fits])
def test_fit_cache_uncacheable(): """Test that a fit with a random seed set is cached.""" cache_size_before = file_usage(cache_path()) # this fit is NOT cacheable, should not be saved normal_posterior = stan.build(program_code) normal_posterior.sample() cache_size_after = file_usage(cache_path()) assert cache_size_before == cache_size_after
def test_initialization_failed(): posterior = stan.build(program_code, random_seed=1) with pytest.raises(RuntimeError, match=r"Initialization failed."): posterior.sample(num_chains=1) # run a second time, in case there are interactions with caching with pytest.raises(RuntimeError, match=r"Initialization failed."): posterior.sample(num_chains=1)
def pystan_noncentered_schools(data, draws, chains): """Non-centered eight schools implementation for pystan.""" schools_code = """ data { int<lower=0> J; real y[J]; real<lower=0> sigma[J]; } parameters { real mu; real<lower=0> tau; real eta[J]; } transformed parameters { real theta[J]; for (j in 1:J) theta[j] = mu + tau * eta[j]; } model { mu ~ normal(0, 5); tau ~ cauchy(0, 5); eta ~ normal(0, 1); y ~ normal(theta, sigma); } generated quantities { vector[J] log_lik; vector[J] y_hat; for (j in 1:J) { log_lik[j] = normal_lpdf(y[j] | theta[j], sigma[j]); y_hat[j] = normal_rng(theta[j], sigma[j]); } } """ if pystan_version() == 2: import pystan # pylint: disable=import-error stan_model = pystan.StanModel(model_code=schools_code) fit = stan_model.sampling( data=data, iter=draws + 500, warmup=500, chains=chains, check_hmc_diagnostics=False, control=dict(adapt_engaged=False), ) else: import stan # pylint: disable=import-error stan_model = stan.build(schools_code, data=data) fit = stan_model.sample(num_chains=chains, num_samples=draws, num_warmup=500, save_warmup=False) return stan_model, fit
def test_eight_schools_build_numpy(posterior): """Verify eight schools compiles.""" schools_data_alt = { "J": 8, "y": np.array([28, 8, -3, 7, -1, 1, 18, 12]), "sigma": pd.Series([15, 10, 16, 11, 9, 11, 10, 18], name="sigma"), } posterior_alt = stan.build(program_code, data=schools_data_alt) assert posterior_alt is not None
def test_normal_sample_args(): """Sample from normal distribution with build arguments.""" program_code = "parameters {real y;} model {y ~ normal(0,1);}" posterior = stan.build(program_code, random_seed=1) assert posterior is not None fit = posterior.sample(num_samples=350, num_thin=2) df = fit.to_frame() assert len(df["y"]) == 350 * 4 // 2 assert -5 < df["y"].mean() < 5
def test_stanc_warning() -> None: """Test that stanc warning is shown to user.""" # stanc prints warning: # assignment operator <- is deprecated in the Stan language; use = instead. program_code = """ parameters { real y; } model { real x; x <- 5; } """ buffer = io.StringIO() with contextlib.redirect_stderr(buffer): stan.build(program_code=program_code) assert "assignment operator <- is deprecated in the Stan language" in buffer.getvalue( )
def test_user_init_different_initial_values(): posterior = stan.build(program_code, data=data, random_seed=2) mu1 = posterior.sample(num_chains=1, num_samples=10, init=[{ "mu": 3 }])["mu"].ravel() mu2 = posterior.sample(num_chains=1, num_samples=10, init=[{ "mu": 4 }])["mu"].ravel() assert mu1[0] != mu2[0]
def fit(y, size_factors, verbose=0): data = {"N": len(y), "y": y.astype(int), "log_s": np.log(size_factors)} posterior = stan.build(POISS_LOGNORMAL, data=data) fit = posterior.sample(num_chains=4, num_samples=1000) sigma = fit["sigma"] mu = fit["mu"] if verbose > 0: print(f"Mu: {mu}") print(f"Sigma: {sigma}") return mu, sigma
def test_normal_sample_chains(): """Sample from normal distribution with more than one chain.""" program_code = "parameters {real y;} model {y ~ normal(0,1);}" posterior = stan.build(program_code) assert posterior is not None fit = posterior.sample(num_chains=3) offset = len(fit.sample_and_sampler_param_names) assert fit._draws.shape == (offset + 1, 1000, 3 ) # 1 param, n samples, 3 chains df = fit.to_frame() assert len(df["y"]) == 3000 assert -5 < df["y"].mean() < 5
def sample(self, modified_observed_data): """Rebuild and resample the PyStan model on modified_observed_data.""" import stan # pylint: disable=import-error,import-outside-toplevel self.model: Union[str, stan.Model] if isinstance(self.model, str): program_code = self.model else: program_code = self.model.program_code self.model = stan.build(program_code, data=modified_observed_data) fit = self.model.sample(**self.sample_kwargs) return fit
def test_normal_sample(): """Sample from normal distribution.""" program_code = "parameters {real y;} model {y ~ normal(0, 0.0001);}" posterior = stan.build(program_code) assert posterior is not None fit = posterior.sample() offset = len(fit.sample_and_sampler_param_names) assert fit._draws.shape == (offset + 1, 1000, 4 ) # 4 chains, n samples, 1 param df = fit.to_frame() assert (df["y"] == fit._draws[offset, :, :].ravel()).all() assert len(df["y"]) == 4000 assert -0.01 < df["y"].mean() < 0.01 assert -0.01 < df["y"].std() < 0.01
def test_vector_params(): """Sample from a program with vector params.""" program_code = """ parameters { vector[3] beta; } model { beta ~ normal(0, 1); } """ posterior = stan.build(program_code) fit = posterior.sample() df = fit.to_frame() assert all(df.columns[-3:] == ["beta.1", "beta.2", "beta.3"]) assert len(df["beta.1"]) > 100
def run(self, iterations, warmup_iterations, chains): print('Running model...') start = time.time() self.model = stan.build('Desktop/GitHub/reinventing-test-and-trace-r/python/model-diagnostics/model-codes/base_model.stan', data=self.data, random_seed=1) fit = self.model.sample(num_samples=iterations, num_warmup=warmup_iterations, num_chains=chains) end = time.time() print('Finished running') fit = fit.to_frame() desc_fit = fit.describe().T desc_fit = desc_fit['mean'] error = np.mean((desc_fit.iloc[7:(self.data['P']+7)] - self.ground_truth['true_theta']) ** 2) #error = np.sum((np.mean(fit.extract()['theta'], axis=0) - self.ground_truth['true_theta']) ** 2) self.mse = error self.posterior = fit self.runtime = (end - start)
def test_fit_cache(): """Test that a fit with a random seed set is cached.""" cache_size_before = file_usage(cache_path()) # this fit is cacheable random_seed = random.randrange(1, 2 ** 16) normal_posterior = stan.build(program_code, random_seed=random_seed) normal_posterior.sample() cache_size_after = file_usage(cache_path()) assert cache_size_after > cache_size_before # fit is now in cache cache_size_before = file_usage(cache_path()) normal_posterior.sample() cache_size_after = file_usage(cache_path()) assert cache_size_before == cache_size_after
def test_bernoulli_different_chains(posterior): fit = posterior.sample(num_chains=2) assert fit.num_chains == 2 # for a fit with only one scalar parameter, it is the last one theta_chain_1 = fit._draws[-1, :, 0] theta_chain_2 = fit._draws[-1, :, 1] assert not np.allclose(theta_chain_1, theta_chain_2) # should also hold when using a random seed del fit fit = stan.build(program_code, data=data, random_seed=42).sample(num_chains=2) theta_chain_1 = fit._draws[-1, :, 0] theta_chain_2 = fit._draws[-1, :, 1] assert not np.allclose(theta_chain_1, theta_chain_2)
def test_user_init_same_initial_values(): posterior = stan.build(program_code, data=data, random_seed=2) mu1 = posterior.sample(num_chains=1, num_samples=10)["mu"].ravel() mu2 = posterior.sample(num_chains=1, num_samples=10)["mu"].ravel() assert mu1[0] == mu2[0] np.testing.assert_array_equal(mu1, mu2) mu3 = posterior.sample(num_chains=1, num_samples=10, init=[{ "mu": -4 }])["mu"].ravel() mu4 = posterior.sample(num_chains=1, num_samples=10, init=[{ "mu": -4 }])["mu"].ravel() assert mu3[0] == mu4[0] np.testing.assert_array_equal(mu3, mu4) assert mu1[0] != mu3[0]
def test_matrix_params_build(): """Sample from a program with matrix-valued params.""" program_code = """ data { int<lower=2> K; int<lower=1> D; } parameters { matrix[K,D] beta; } model { for (k in 1:K) for (d in 1:D) beta[k,d] ~ normal(0, 1); } """ data = {"K": 9, "D": 5} posterior = stan.build(program_code, data=data) assert posterior is not None
def test_matrix_params_sample(): """Sample from a program with matrix-valued params.""" program_code = """ data { int<lower=2> K; int<lower=1> D; } parameters { matrix[K,D] beta; } model { for (k in 1:K) for (d in 1:D) beta[k,d] ~ normal(0, 1); } """ data = {"K": 9, "D": 5} posterior = stan.build(program_code, data=data) fit = posterior.sample() df = fit.to_frame() assert len(df.columns) == len( fit.sample_and_sampler_param_names) + data["K"] * data["D"] assert len(df["beta.1.1"]) > 100
def normal_posterior(): return stan.build(program_code)
def test_bernoulli_random_seed_same(): fits = [ stan.build(program_code, data=data, random_seed=42).sample() for _ in range(2) ] assert np.allclose(*[fit["theta"] for fit in fits])