Beispiel #1
0
def test_stanc_no_warning() -> None:
    """No warnings."""
    program_code = "parameters {real y;} model {y ~ normal(0,1);}"
    buffer = io.StringIO()
    with contextlib.redirect_stderr(buffer):
        stan.build(program_code=program_code)
    assert "warning" not in buffer.getvalue().lower()
Beispiel #2
0
def test_generated_quantities_seed():
    fit1 = stan.build(program_code, data=data,
                      random_seed=123).sample(num_samples=10)
    fit2 = stan.build(program_code, data=data,
                      random_seed=123).sample(num_samples=10)
    fit3 = stan.build(program_code, data=data,
                      random_seed=456).sample(num_samples=10)
    assert np.allclose(fit1["mean_y"], fit2["mean_y"])
    assert not np.allclose(fit1["mean_y"], fit3["mean_y"])
Beispiel #3
0
def test_data_unmodified(posterior):
    # pull in posterior to cache compilation
    data_with_array = copy.deepcopy(data)
    # `build` will convert data into a list, should not change original
    data_with_array["y"] = np.array(data_with_array["y"], dtype=int)
    assert data_with_array["y"].dtype == int
    stan.build(program_code, data=data_with_array)
    # `data_with_array` should be unchanged
    assert not isinstance(data_with_array["y"], list)
    assert data_with_array["y"].dtype == int
def test_stanc_exception_semicolon():
    program_code = """
    parameters {
        real z
        real y
    }
    model {
        z ~ normal(0, 1);
        y ~ normal(0, 1);}
    """
    with pytest.raises(RuntimeError, match=r'PARSER EXPECTED: ";"'):
        stan.build(program_code=program_code)
def test_stanc_exception_semicolon():
    program_code = """
    parameters {
        real z
        real y
    }
    model {
        z ~ normal(0, 1);
        y ~ normal(0, 1);}
    """
    with pytest.raises(RuntimeError, match=r"Syntax error in"):
        stan.build(program_code=program_code)
Beispiel #6
0
    def test_empty_parameter(self):
        model_code = """
            parameters {
                real y;
                vector[3] x;
                vector[0] a;
                vector[2] z;
            }
            model {
                y ~ normal(0,1);
            }
        """
        if pystan_version() == 2:
            from pystan import StanModel  # pylint: disable=import-error

            model = StanModel(model_code=model_code)
            fit = model.sampling(iter=500,
                                 chains=2,
                                 check_hmc_diagnostics=False)
        else:
            import stan  # pylint: disable=import-error

            model = stan.build(model_code)
            fit = model.sample(num_samples=500, num_chains=2)

        posterior = from_pystan(posterior=fit)
        test_dict = {
            "posterior": ["y", "x", "z", "~a"],
            "sample_stats": ["diverging"]
        }
        fails = check_multiple_attrs(test_dict, posterior)
        assert not fails
Beispiel #7
0
def run_stan_model(model_name, year, m=1, n=7, **kwargs):
    data = get_stan_data(m=m, n=n)
    with open(model_name, 'r') as f:
        stan_model = f.read()
    posterior = stan.build(stan_model, data)
    FIT = posterior.sample(**kwargs)
    return FIT
Beispiel #8
0
    def __init__(self, code, data):

        # Get number of parameters (where vector parameters count as 1)
        n_params = StanLogPDF._count_parameters(code)

        # Store data
        # TODO: Deep clone data so that it can't be changed anymore?
        self._data = dict(data)

        # Build stan model
        posterior = stan.build(code, data=data)

        # Use httpstan to get access to the compiled module
        module = httpstan.models.import_services_extension_module(
            posterior.model_name)

        # Get flattened parameter names
        names = posterior.param_names[:n_params]
        dims = posterior.dims[:n_params]
        self._names = []
        for name, dims in zip(names, dims):
            if dims:
                assert len(dims) == 1
                self._names.extend(
                    [name + '_' + str(i) for i in range(dims[0])])
            else:
                self._names.append(name)

        # Get flattened parameter count
        self._n_parameters = len(self._names)

        # Get PDF and PDFS1 methods
        self._log_prob = module.log_prob
        self._log_prob_grad = module.log_prob_grad
Beispiel #9
0
def test_nan_inf():
    posterior = stan.build(program_code)
    fit = posterior.sample()
    assert fit is not None
    assert math.isnan(fit["alpha"].ravel()[0])
    assert math.isinf(fit["beta"].ravel()[0])
    assert math.isinf(fit["gamma"].ravel()[0])
Beispiel #10
0
def test_bernoulli_random_seed_different(posterior):
    fits = [
        stan.build(program_code, data=data, random_seed=seed).sample()
        for seed in (1, 2)
    ]
    for i, fit in enumerate(fits):
        print(i, fit["theta"].ravel()[:10])
    assert not np.allclose(*[fit["theta"] for fit in fits])
Beispiel #11
0
def test_fit_cache_uncacheable():
    """Test that a fit with a random seed set is cached."""
    cache_size_before = file_usage(cache_path())
    # this fit is NOT cacheable, should not be saved
    normal_posterior = stan.build(program_code)
    normal_posterior.sample()
    cache_size_after = file_usage(cache_path())
    assert cache_size_before == cache_size_after
Beispiel #12
0
def test_initialization_failed():
    posterior = stan.build(program_code, random_seed=1)
    with pytest.raises(RuntimeError, match=r"Initialization failed."):
        posterior.sample(num_chains=1)

    # run a second time, in case there are interactions with caching
    with pytest.raises(RuntimeError, match=r"Initialization failed."):
        posterior.sample(num_chains=1)
Beispiel #13
0
def pystan_noncentered_schools(data, draws, chains):
    """Non-centered eight schools implementation for pystan."""
    schools_code = """
        data {
            int<lower=0> J;
            real y[J];
            real<lower=0> sigma[J];
        }

        parameters {
            real mu;
            real<lower=0> tau;
            real eta[J];
        }

        transformed parameters {
            real theta[J];
            for (j in 1:J)
                theta[j] = mu + tau * eta[j];
        }

        model {
            mu ~ normal(0, 5);
            tau ~ cauchy(0, 5);
            eta ~ normal(0, 1);
            y ~ normal(theta, sigma);
        }

        generated quantities {
            vector[J] log_lik;
            vector[J] y_hat;
            for (j in 1:J) {
                log_lik[j] = normal_lpdf(y[j] | theta[j], sigma[j]);
                y_hat[j] = normal_rng(theta[j], sigma[j]);
            }
        }
    """
    if pystan_version() == 2:
        import pystan  # pylint: disable=import-error

        stan_model = pystan.StanModel(model_code=schools_code)
        fit = stan_model.sampling(
            data=data,
            iter=draws + 500,
            warmup=500,
            chains=chains,
            check_hmc_diagnostics=False,
            control=dict(adapt_engaged=False),
        )
    else:
        import stan  # pylint: disable=import-error

        stan_model = stan.build(schools_code, data=data)
        fit = stan_model.sample(num_chains=chains,
                                num_samples=draws,
                                num_warmup=500,
                                save_warmup=False)
    return stan_model, fit
Beispiel #14
0
def test_eight_schools_build_numpy(posterior):
    """Verify eight schools compiles."""
    schools_data_alt = {
        "J": 8,
        "y": np.array([28, 8, -3, 7, -1, 1, 18, 12]),
        "sigma": pd.Series([15, 10, 16, 11, 9, 11, 10, 18], name="sigma"),
    }
    posterior_alt = stan.build(program_code, data=schools_data_alt)
    assert posterior_alt is not None
Beispiel #15
0
def test_normal_sample_args():
    """Sample from normal distribution with build arguments."""
    program_code = "parameters {real y;} model {y ~ normal(0,1);}"
    posterior = stan.build(program_code, random_seed=1)
    assert posterior is not None
    fit = posterior.sample(num_samples=350, num_thin=2)
    df = fit.to_frame()
    assert len(df["y"]) == 350 * 4 // 2
    assert -5 < df["y"].mean() < 5
Beispiel #16
0
def test_stanc_warning() -> None:
    """Test that stanc warning is shown to user."""
    # stanc prints warning:
    # assignment operator <- is deprecated in the Stan language; use = instead.
    program_code = """
    parameters {
    real y;
    }
    model {
    real x;
    x <- 5;
    }
    """
    buffer = io.StringIO()
    with contextlib.redirect_stderr(buffer):
        stan.build(program_code=program_code)
    assert "assignment operator <- is deprecated in the Stan language" in buffer.getvalue(
    )
Beispiel #17
0
def test_user_init_different_initial_values():
    posterior = stan.build(program_code, data=data, random_seed=2)

    mu1 = posterior.sample(num_chains=1, num_samples=10, init=[{
        "mu": 3
    }])["mu"].ravel()
    mu2 = posterior.sample(num_chains=1, num_samples=10, init=[{
        "mu": 4
    }])["mu"].ravel()
    assert mu1[0] != mu2[0]
def fit(y, size_factors, verbose=0):
    data = {"N": len(y), "y": y.astype(int), "log_s": np.log(size_factors)}
    posterior = stan.build(POISS_LOGNORMAL, data=data)
    fit = posterior.sample(num_chains=4, num_samples=1000)
    sigma = fit["sigma"]
    mu = fit["mu"]

    if verbose > 0:
        print(f"Mu: {mu}")
        print(f"Sigma: {sigma}")

    return mu, sigma
Beispiel #19
0
def test_normal_sample_chains():
    """Sample from normal distribution with more than one chain."""
    program_code = "parameters {real y;} model {y ~ normal(0,1);}"
    posterior = stan.build(program_code)
    assert posterior is not None
    fit = posterior.sample(num_chains=3)
    offset = len(fit.sample_and_sampler_param_names)
    assert fit._draws.shape == (offset + 1, 1000, 3
                                )  # 1 param, n samples, 3 chains
    df = fit.to_frame()
    assert len(df["y"]) == 3000
    assert -5 < df["y"].mean() < 5
Beispiel #20
0
    def sample(self, modified_observed_data):
        """Rebuild and resample the PyStan model on modified_observed_data."""
        import stan  # pylint: disable=import-error,import-outside-toplevel

        self.model: Union[str, stan.Model]
        if isinstance(self.model, str):
            program_code = self.model
        else:
            program_code = self.model.program_code
        self.model = stan.build(program_code, data=modified_observed_data)
        fit = self.model.sample(**self.sample_kwargs)
        return fit
Beispiel #21
0
def test_normal_sample():
    """Sample from normal distribution."""
    program_code = "parameters {real y;} model {y ~ normal(0, 0.0001);}"
    posterior = stan.build(program_code)
    assert posterior is not None
    fit = posterior.sample()
    offset = len(fit.sample_and_sampler_param_names)
    assert fit._draws.shape == (offset + 1, 1000, 4
                                )  # 4 chains, n samples, 1 param
    df = fit.to_frame()
    assert (df["y"] == fit._draws[offset, :, :].ravel()).all()
    assert len(df["y"]) == 4000
    assert -0.01 < df["y"].mean() < 0.01
    assert -0.01 < df["y"].std() < 0.01
Beispiel #22
0
def test_vector_params():
    """Sample from a program with vector params."""
    program_code = """
        parameters {
          vector[3] beta;
        }
        model {
          beta ~ normal(0, 1);
        }
    """
    posterior = stan.build(program_code)
    fit = posterior.sample()
    df = fit.to_frame()
    assert all(df.columns[-3:] == ["beta.1", "beta.2", "beta.3"])
    assert len(df["beta.1"]) > 100
 def run(self, iterations, warmup_iterations, chains):
     print('Running model...')
     start = time.time()
     self.model = stan.build('Desktop/GitHub/reinventing-test-and-trace-r/python/model-diagnostics/model-codes/base_model.stan', data=self.data, random_seed=1) 
     fit = self.model.sample(num_samples=iterations, num_warmup=warmup_iterations, num_chains=chains)
     end = time.time()
     print('Finished running')
     fit = fit.to_frame()
     desc_fit = fit.describe().T
     desc_fit = desc_fit['mean']
     
     error = np.mean((desc_fit.iloc[7:(self.data['P']+7)] - self.ground_truth['true_theta']) ** 2)
     #error = np.sum((np.mean(fit.extract()['theta'], axis=0) - self.ground_truth['true_theta']) ** 2)
     self.mse = error
     self.posterior = fit
     self.runtime = (end - start) 
Beispiel #24
0
def test_fit_cache():
    """Test that a fit with a random seed set is cached."""

    cache_size_before = file_usage(cache_path())
    # this fit is cacheable
    random_seed = random.randrange(1, 2 ** 16)
    normal_posterior = stan.build(program_code, random_seed=random_seed)
    normal_posterior.sample()
    cache_size_after = file_usage(cache_path())
    assert cache_size_after > cache_size_before

    # fit is now in cache
    cache_size_before = file_usage(cache_path())
    normal_posterior.sample()
    cache_size_after = file_usage(cache_path())
    assert cache_size_before == cache_size_after
Beispiel #25
0
def test_bernoulli_different_chains(posterior):
    fit = posterior.sample(num_chains=2)
    assert fit.num_chains == 2

    # for a fit with only one scalar parameter, it is the last one
    theta_chain_1 = fit._draws[-1, :, 0]
    theta_chain_2 = fit._draws[-1, :, 1]
    assert not np.allclose(theta_chain_1, theta_chain_2)

    # should also hold when using a random seed
    del fit
    fit = stan.build(program_code, data=data,
                     random_seed=42).sample(num_chains=2)
    theta_chain_1 = fit._draws[-1, :, 0]
    theta_chain_2 = fit._draws[-1, :, 1]
    assert not np.allclose(theta_chain_1, theta_chain_2)
Beispiel #26
0
def test_user_init_same_initial_values():
    posterior = stan.build(program_code, data=data, random_seed=2)

    mu1 = posterior.sample(num_chains=1, num_samples=10)["mu"].ravel()
    mu2 = posterior.sample(num_chains=1, num_samples=10)["mu"].ravel()
    assert mu1[0] == mu2[0]
    np.testing.assert_array_equal(mu1, mu2)

    mu3 = posterior.sample(num_chains=1, num_samples=10, init=[{
        "mu": -4
    }])["mu"].ravel()
    mu4 = posterior.sample(num_chains=1, num_samples=10, init=[{
        "mu": -4
    }])["mu"].ravel()
    assert mu3[0] == mu4[0]
    np.testing.assert_array_equal(mu3, mu4)
    assert mu1[0] != mu3[0]
Beispiel #27
0
def test_matrix_params_build():
    """Sample from a program with matrix-valued params."""
    program_code = """
        data {
          int<lower=2> K;
          int<lower=1> D;
        }
        parameters {
          matrix[K,D] beta;
        }
        model {
          for (k in 1:K)
            for (d in 1:D)
                beta[k,d] ~ normal(0, 1);
        }
    """
    data = {"K": 9, "D": 5}
    posterior = stan.build(program_code, data=data)
    assert posterior is not None
Beispiel #28
0
def test_matrix_params_sample():
    """Sample from a program with matrix-valued params."""
    program_code = """
        data {
          int<lower=2> K;
          int<lower=1> D;
        }
        parameters {
          matrix[K,D] beta;
        }
        model {
          for (k in 1:K)
            for (d in 1:D)
                beta[k,d] ~ normal(0, 1);
        }
    """
    data = {"K": 9, "D": 5}
    posterior = stan.build(program_code, data=data)
    fit = posterior.sample()
    df = fit.to_frame()
    assert len(df.columns) == len(
        fit.sample_and_sampler_param_names) + data["K"] * data["D"]
    assert len(df["beta.1.1"]) > 100
Beispiel #29
0
def normal_posterior():
    return stan.build(program_code)
Beispiel #30
0
def test_bernoulli_random_seed_same():
    fits = [
        stan.build(program_code, data=data, random_seed=42).sample()
        for _ in range(2)
    ]
    assert np.allclose(*[fit["theta"] for fit in fits])