Example #1
0
 def test_init_zero_exception_inf_grad(self):
     code = """
     parameters {
         real x;
     }
     model {
         target += 1 / log(x);
     }
     """
     sm = StanModel(model_code=code)
     with self.assertRaises(RuntimeError):
         sm.sampling(init='0', iter=1, chains=1)
 def test_init_zero_exception_inf_grad(self):
     code = """
     parameters {
         real x;
     }
     model {
         target += 1 / log(x);
     }
     """
     sm = StanModel(model_code=code)
     with self.assertRaises(RuntimeError):
         sm.sampling(init='0', iter=1, chains=1)
Example #3
0
 def test_init_zero_exception_inf_grad(self):
     code = """
     parameters {
         real x;
     }
     model {
         lp__ <- 1 / log(x);
     }
     """
     sm = StanModel(model_code=code)
     assertRaisesRegex = self.assertRaisesRegexp if PY2 else self.assertRaisesRegex
     with assertRaisesRegex(RuntimeError, 'divergent gradient'):
         sm.sampling(init='0', iter=1)
Example #4
0
 def test_init_zero_exception_inf_grad(self):
     code = """
     parameters {
         real x;
     }
     model {
         lp__ <- 1 / log(x);
     }
     """
     sm = StanModel(model_code=code)
     assertRaisesRegex = self.assertRaisesRegexp if PY2 else self.assertRaisesRegex
     with assertRaisesRegex(RuntimeError, 'divergent gradient'):
         sm.sampling(init='0', iter=1)
Example #5
0
def run_or_load_model(m_type, m_dict, iters, warmup, c_params):
    if m_type not in ['car', 'tobit']:
        raise Exception('Invalid model type!')
    name = 'crash_{}_{}-{}_delta_{}_max_{}'.format(m_type, iters, warmup,
                                                   c_params['adapt_delta'],
                                                   c_params['max_treedepth'])
    try:
        model = load(Path('cache/' + name + '_model.joblib'))
    except:
        model = StanModel(file=Path(
            'models/crash_{}.stan'.format(m_type)).open(),
                          extra_compile_args=["-w"],
                          model_name=name.split('-')[0])
        dump(model, Path('cache/' + name + '_model.joblib'))
    try:
        fit = load(Path('cache/' + name + '_fit.joblib'))
    except:
        fit = model.sampling(data=m_dict,
                             iter=iters,
                             warmup=warmup,
                             control=c_params,
                             check_hmc_diagnostics=True)
        info = fit.stansummary()
        with open(Path('logs/' + name + '.log'), 'w') as c_log:
            c_log.write(info)
        dump(fit, Path('cache/' + name + '_fit.joblib'))
    return model, fit
Example #6
0
    def test_empty_parameter(self):
        model_code = """
            parameters {
                real y;
                vector[3] x;
                vector[0] a;
                vector[2] z;
            }
            model {
                y ~ normal(0,1);
            }
        """
        if pystan_version() == 2:
            from pystan import StanModel  # pylint: disable=import-error

            model = StanModel(model_code=model_code)
            fit = model.sampling(iter=500,
                                 chains=2,
                                 check_hmc_diagnostics=False)
        else:
            import stan  # pylint: disable=import-error

            model = stan.build(model_code)
            fit = model.sample(num_samples=500, num_chains=2)

        posterior = from_pystan(posterior=fit)
        test_dict = {
            "posterior": ["y", "x", "z", "~a"],
            "sample_stats": ["diverging"]
        }
        fails = check_multiple_attrs(test_dict, posterior)
        assert not fails
Example #7
0
def simple_car_model(tobit_data: pd.DataFrame, ad_matrix):
    """
    In the model of the researchers, phi is distributed around phi_bar
    is this handled by the multi_normal_prec??? Need to understand docs and adjust if not.
      - seems to be legit. Documentation of WinBUGS does it in a similar way.
    https://mc-stan.org/docs/2_19/functions-reference/multivariate-normal-distribution-precision-parameterization.html
     - find out what the CAR prior in car.normal is. Right now I just have 2/-2 ...
       - Unfortunately, there is no information available. Just need to set something that works.

    """
    car_model = StanModel(file=Path('models/tobit_car_students.stan').open(),
                          extra_compile_args=["-w"])
    car_dict = get_datadict()
    car_dict['W'] = ad_matrix
    car_dict['U'] = 800

    # this smaller run still took 25 mins to sample...
    # And still getting too low E-BFMI values
    car_fit = car_model.sampling(data=car_dict,
                                 iter=2000,
                                 warmup=500,
                                 chains=4)
    dump(car_fit, Path('data/car_students_2000.joblib'))
    car_res = car_fit.extract()
    print('β_0: {}'.format(car_res['beta_zero'][501:].mean()))
    print('β:   {}'.format(car_res['beta'][501:].mean(axis=0)))

    # getting many rejections - bad? Phi is a bit like a covariance matrix
    # -> only in the beginning, after 200 iterations all fine.
    # result from the run: chains have not mixed, might need to re-parametrize...
    # am I contraining the variables too much??? Need to center somehow?
    return car_fit, car_model
Example #8
0
def scaled_spare_car(tobit_data: pd.DataFrame, ad_matrix):
    """
    will try with values closer to 0 now.
    sigma was  67.3  with stdev 3.74
    even worse - E-BMFI is still small, but now also much treedepth saturation (OK)
    and chain divergence (bad!) would need to check energy-plots and what correlates...
    TODO: if I scale, I have the danger of missing not hitting the condition for U...
     -> should not be a problem if I have zeros there as lower bound
    """
    tobit_data['ones'] = np.ones(tobit_data.shape[0])
    trans = MaxAbsScaler().fit_transform(tobit_data[new_preds + ['apt']])
    data_centered = pd.DataFrame(trans, columns=new_preds + ['apt'])
    is_800 = tobit_data['apt'] == 800
    not_800 = tobit_data['apt'] != 800
    ii_obs = tobit_data[not_800]['id']
    ii_cens = tobit_data[is_800]['id']
    # After using vectorisation: Gradient takes 0.0003  seconds.
    c_sparse_dict = {
        'X': data_centered[new_preds],
        'n': tobit_data.shape[0],
        'n_obs': not_800.sum(),
        'n_cens': is_800.sum(),
        'y_obs': data_centered[not_800]['apt'],
        'ii_obs': ii_obs,
        'ii_cens': ii_cens,
        'p': len(new_preds),
        'y_cens': data_centered[is_800]['apt'],
        'W': ad_matrix,
        'U': 1,
        'W_n': ad_matrix.sum() // 2
    }
    # or just 'models/sparse_tcar_students_without_QR.stan'
    c_sp_model = StanModel(file=Path('sparse_tobitcar_students.stan').open(),
                           verbose=False,
                           extra_compile_args=["-w"])
    c_params = {'adapt_delta': 0.95, 'max_treedepth': 12}
    # no more saturation, but still divergence...
    # trying to constrain the model: α <= 0.99 instead <=1, σ >= 0.001
    c_sp_fit = c_sp_model.sampling(c_sparse_dict,
                                   iter=4000,
                                   warmup=500,
                                   control=c_params)
    c_sp_res = c_sp_fit.extract()
    print(c_sp_fit.stansummary())
    dump(c_sp_fit, 'data/c_sp_4000.joblib')
    plt.scatter(c_sp_fit['lp__'], c_sp_fit['sigma'])

    # sigma looks very correlated.
    simpler_csp = c_sp_res.copy()
    del simpler_csp['phi']
    del simpler_csp['y_cens']
    del simpler_csp['beta']
    del simpler_csp['y']
    if 'theta' in simpler_csp:
        del simpler_csp['theta']
    c_sp_df = pd.DataFrame.from_dict(simpler_csp)
    sns.pairplot(c_sp_df)
    return c_sp_fit, c_sp_model
Example #9
0
def verify_stan():
    """
    Simplest model to verify the stan installation
    """
    model_code = 'parameters {real y;} model {y ~ normal(0,1);}'
    model = StanModel(model_code=model_code)
    y = model.sampling().extract()['y']
    print('If this worked, you will see a value near 0 now:')
    print(y.mean())
Example #10
0
def _bayes_sampling(x, y, distribution='normal'):
    """
	Helper function.

	Args:
		x (array_like): sample of a treatment group
		y (array_like): sample of a control group
		distribution: name of the KPI distribution model, which assumes a
			Stan model file with the same name exists

	Returns:
		tuple:
			- the posterior samples
			- sample size of x
			- sample size of y
			- absolute mean of x
			- absolute mean of y
	"""
    # Checking if data was provided
    if x is None or y is None:
        raise ValueError('Please provide two non-None samples.')

    # Coercing missing values to right format
    _x = np.array(x, dtype=float)
    _y = np.array(y, dtype=float)

    mu_x = np.nanmean(_x)
    mu_y = np.nanmean(_y)
    n_x = statx.sample_size(_x)
    n_y = statx.sample_size(_y)

    if distribution == 'normal':
        fit_data = {'Nc': n_y, 'Nt': n_x, 'x': _x, 'y': _y}
    elif distribution == 'poisson':
        fit_data = {
            'Nc': n_y,
            'Nt': n_x,
            'x': _x.astype(int),
            'y': _y.astype(int)
        }
    else:
        raise NotImplementedError
    model_file = __location__ + '/../models/' + distribution + '_kpi.stan'
    sm = StanModel(file=model_file)

    fit = sm.sampling(data=fit_data,
                      iter=25000,
                      chains=4,
                      n_jobs=1,
                      seed=1,
                      control={
                          'stepsize': 0.01,
                          'adapt_delta': 0.99
                      })
    traces = fit.extract()

    return traces, n_x, n_y, mu_x, mu_y
Example #11
0
    def _fit_stan_model(self, vb: bool, sm: StanModel, data_dict: Dict,
                        pars: List, gen_init: Union[str, Callable],
                        nchain: int, niter: int, nwarmup: int, nthin: int,
                        adapt_delta: float, stepsize: float,
                        max_treedepth: int, ncore: int) -> Any:
        """Fit the stan model.

        Parameters
        ----------
        vb
            Whether to perform variational Bayesian analysis.
        sm
            The StanModel object to use to fit the model.
        data_dict
            Dict holding the data to pass to Stan.
        pars
            List specifying the parameters of interest.
        gen_init
            String or function to specify how to generate the initial values.
        nchain
            Number of chains to run.
        niter
            Number of iterations per chain.
        nwarmup
            Number of warm-up iterations.
        nthin
            Use every `i == nthin` sample to generate posterior distribution.
        adapt_delta
            Advanced control argument for sampler.
        stepsize
            Advanced control argument for sampler.
        max_treedepth
            Advanced control argument for sampler.
        ncore
            Argument for parallel computing while sampling multiple chains.

        Returns
        -------
        fit
            The fitted result returned by `vb` or `sampling` function.
        """
        if vb:
            return sm.vb(data=data_dict, pars=pars, init=gen_init)
        else:
            return sm.sampling(data=data_dict,
                               pars=pars,
                               init=gen_init,
                               chains=nchain,
                               iter=niter,
                               warmup=nwarmup,
                               thin=nthin,
                               control={
                                   'adapt_delta': adapt_delta,
                                   'stepsize': stepsize,
                                   'max_treedepth': max_treedepth
                               },
                               n_jobs=ncore)
def main():
    schools_dat = {
        'J': 8,
        'y': [28, 8, -3, 7, -1, 1, 18, 12],
        'sigma': [15, 10, 16, 11, 9, 11, 10, 18]
    }
    sm = StanModel(file='model.stan')
    fit = sm.sampling(data=schools_dat, iter=1000, chains=4, seed=555)
    with open(DATA_FILE_NAME, 'wb') as f:
        pickle.dump({'model': sm, 'fit': fit}, f)
Example #13
0
def sparse_car_model(tobit_data: pd.DataFrame, ad_matrix):
    sparse_dict = get_sparse_modeldict(tobit_data, ad_matrix)
    sparse_model = StanModel(
        file=Path('models/sparse_tobitcar_students.stan').open(),
        extra_compile_args=["-w"])
    sparse_fit = sparse_model.sampling(sparse_dict,
                                       iter=4000,
                                       warmup=500,
                                       chains=4)
    print(sparse_fit.stansummary())
    return sparse_fit, sparse_model
Example #14
0
def coin_model():
    """
    Example from „Kruschke: Doing Bayesian Data Analysis”. 
    """
    coin_model = StanModel(file=Path('models/bernoulli_example.stan').open())
    # generate some data
    N = 50
    z = 10
    y = [1] * z + [0] * (N - z)
    coin_data = {'y': y, 'N': N}
    # warmup is the same as burnin in JAGS
    return coin_model.sampling(data=coin_data, chains=3, iter=1000, warmup=200)
Example #15
0
def linear_model():
    """
    1st example from Stan User's Guide
    """
    linear_model = StanModel(file=Path('models/linear_example.stan').open(),
                             extra_compile_args=["-w"])
    x = list(range(10))
    y = [1.1, 2.04, 3.07, 3.88, 4.95, 6.11, 7.03, 7.89, 8.91, 10]
    linear_data = {'x': x, 'y': y, 'N': 10}
    linear_fit = linear_model.sampling(data=linear_data)
    linear_res = linear_fit.extract()
    print('α : {}'.format(np.mean(linear_res['alpha'])))
    print('β : {}'.format(np.mean(linear_res['beta'])))
    return linear_fit
Example #16
0
def bnb_stan(dataset, oos_dataset, warmup=20000, n_iter=25000):
    Y_data, ratings_data, expectations_data, team_dummies_data, pct = (
        extract_data(dataset))
    _, oos_ratings_data, oos_expectations_data, oos_team_dummies_data, oos_pct = \
        extract_data(oos_dataset)
    ratings_data = ratings_data.squeeze()
    oos_ratings_data = oos_ratings_data.squeeze()
    ratings_data, oos_ratings_data = normalize(ratings_data, oos_ratings_data)
    ratings_data = np.stack((ratings_data, np.square(ratings_data)), axis=1)
    oos_ratings_data = np.stack(
        (oos_ratings_data,
         np.sign(oos_ratings_data) * np.square(oos_ratings_data)),
        axis=1)
    pct, oos_pct = normalize(pct, oos_pct)
    expectations_data, oos_expectations_data = normalize(
        expectations_data, oos_expectations_data)
    home_team_dummies = team_dummies_data[::, 0, ::]
    away_team_dummies = team_dummies_data[::, 1, ::]
    stan_data = {
        'n_rows': Y_data.shape[0],
        'n_teams': team_dummies_data.shape[2],
        'm_ratings': ratings_data.shape[1],
        'max_goals': 10,
        'home_team_dummies': home_team_dummies,
        'away_team_dummies': away_team_dummies,
        'expectations': expectations_data,
        'pct': pct,
        'ratings': ratings_data,
        'Y': Y_data.astype(np.int16),
        'oos_n_rows': oos_ratings_data.shape[0],
        'oos_home_team_dummies': oos_team_dummies_data[::, 0, ::],
        'oos_away_team_dummies': oos_team_dummies_data[::, 1, ::],
        'oos_expectations': oos_expectations_data,
        'oos_ratings': oos_ratings_data,
        'oos_pct': oos_pct
    }
    stan_model = StanModel('../stan/games.stan')
    samples = stan_model.sampling(stan_data,
                                  warmup=warmup,
                                  iter=n_iter,
                                  chains=4,
                                  refresh=1,
                                  control={
                                      'adapt_delta': 0.99,
                                      'max_treedepth': 15
                                  })
    preds = samples['predicted_probabilities']
    mean_preds = np.mean(preds, axis=0)
    return samples, mean_preds
Example #17
0
def tobit_vec_QR(tobit_data: pd.DataFrame, scaled: bool = False):
    """
    vectorised version of the tobit model that combines the parameters for the censored
    values with the uncensored values into a transformed y for more efficiency.
    """
    vec_model = StanModel(
        file=Path('models/tobit_students_vec_qr.stan').open(),
        extra_compile_args=["-w"])
    not_800 = tobit_data['apt'] != 800
    is_800 = tobit_data['apt'] == 800
    ii_obs = tobit_data[not_800]['id']
    ii_cens = tobit_data[is_800]['id']
    if not scaled:
        vec_dict = {
            'X': tobit_data[new_preds],
            'n_obs': not_800.sum(),
            'n_cens': is_800.sum(),
            'U': 800,
            'y_obs': tobit_data[not_800]['apt'],
            'p': len(new_preds),
            'ii_obs': ii_obs,
            'ii_cens': ii_cens
        }
    else:
        trans = MaxAbsScaler().fit_transform(tobit_data[new_preds + ['apt']])
        data_centered = pd.DataFrame(trans, columns=new_preds + ['apt'])
        vec_dict = {
            'X': data_centered[new_preds],
            'n_obs': not_800.sum(),
            'n_cens': is_800.sum(),
            'U': 800,
            'y_obs': data_centered[not_800]['apt'],
            'p': len(new_preds),
            'ii_obs': ii_obs,
            'ii_cens': ii_cens,
            'X_cens': data_centered[is_800][new_preds]
        }

    vec_fit = vec_model.sampling(data=vec_dict,
                                 iter=10000,
                                 chains=4,
                                 warmup=2000,
                                 control=c_params)
    print('β: {}'.format(vec_fit['beta'][501:].mean(axis=0)))
    print(vec_fit.stansummary())
    return vec_fit, vec_model
Example #18
0
def tobit_simple_model(tobit_data: pd.DataFrame, scaled: bool = False):
    """
    2) using a censored model. Has the same sigma - in the paper, the distinction 
    between ε_{it} ~ normal(0,σ^2and θ^m_{it} ~ normal(0, δ^2_m) is clearly made.
    This looks quite close to the values from the tutorial:
    Intercept:  209.5488
    mydata$read: 2.6980, mydata$math: 5.9148  
    """
    censored_model = StanModel(
        file=Path('models/tobit_students_split.stan').open(),
        extra_compile_args=["-w"])
    not_800 = tobit_data['apt'] != 800
    is_800 = tobit_data['apt'] == 800
    if not scaled:
        cens_dict_ex = {
            'X': tobit_data[not_800][new_preds],
            'n': tobit_data.shape[0] - is_800.sum(),
            'y': tobit_data[not_800]['apt'],
            'n_cens': is_800.sum(),
            'p': len(new_preds),
            'X_cens': tobit_data[is_800][new_preds],
            'y_cens': tobit_data[is_800]['apt'],
            'U': 800
        }
    else:
        trans = MaxAbsScaler().fit_transform(tobit_data[new_preds + ['apt']])
        data_centered = pd.DataFrame(trans, columns=new_preds + ['apt'])
        cens_dict_ex = {
            'X': data_centered[not_800][new_preds],
            'n': tobit_data.shape[0] - is_800.sum(),
            'y': data_centered[not_800]['apt'],
            'n_cens': is_800.sum(),
            'p': len(new_preds),
            'y_cens': data_centered[is_800]['apt'],
            'U': 1,
            'X_cens': tobit_data[is_800][new_preds]
        }
    censored_fit = censored_model.sampling(data=cens_dict_ex,
                                           iter=2000,
                                           chains=4,
                                           warmup=500,
                                           control=c_params)
    censored_res = censored_fit.extract()
    print('β: {}'.format(censored_res['beta'][501:].mean(axis=0)))
    return censored_fit, censored_model
Example #19
0
def run_inference():
    df = pd.read_csv('3gaussians-10k.csv')
    X = np.array(df[['XX', 'YY']].values)
    K = 3

    data = {'D': 2,
            'K': 3,
            'N': 10000,
            'Omega0': np.identity(2),
            'alpha': K * [0.1],
            'beta0': 0.1,
            'dof0': 1.1,
            'm0': np.zeros(2),
            'x': X}

    model = StanModel(file='finite_gaussian_mixture.stan')

    return model.sampling(data=data, warmup=200, iter=700)
Example #20
0
    def test_empty_parameter(self):
        if pystan_version() == 2:
            model_code = """
                parameters {
                    real y;
                    vector[0] z;
                }
                model {
                    y ~ normal(0,1);
                }
            """
            from pystan import StanModel

            model = StanModel(model_code=model_code)
            fit = model.sampling(iter=10, chains=2, check_hmc_diagnostics=False)
            posterior = from_pystan(posterior=fit)
            assert hasattr(posterior, "posterior")
            assert hasattr(posterior.posterior, "y")
            assert not hasattr(posterior.posterior, "z")
Example #21
0
def calibrate_noise_model(benchmarks, all_node_results, run_name=None,
    model_filename="models/noise-with-outliers.stan", iterations=9000, warmup=8000, chains=1):
    """
    Run the given noise model for the benchmark stars.
    """

    if run_name is None:
        run_name = "unnamed"

    else:
        # If a name has been given, use a timestamp too.
        run_name = "-".join([run_name, format(md5(ctime().encode("utf-8")).hexdigest())])

    # Check for a compiled version of this model.
    basename, ext = os.path.splitext(model_filename)
    if os.path.exists(basename + ".pkl"):
        # There's a compiled version. Use that.
        model_filename = basename + ".pkl"
        logging.info("Using pre-compiled model {0}".format(model_filename))
        with open(model_filename, "rb") as fp:
            model = pickle.load(fp)

    else:
        # Compilation required.
        model = StanModel(model_filename)
        pickled_model_filename = basename + ".pkl"
        logging.info("Pickling compiled model to {0}".format(pickled_model_filename))
        with open(pickled_model_filename, "wb") as fp:
            pickle.dump(model, fp)

    data, node_names = build_data_dict(benchmarks, all_node_results)

    logging.info("Optimizing...")
    op = model.optimizing(data=data)
    logging.info("Optimized Values: \n{0}".format(op["par"]))

    logging.info("Fitting...")
    calibrated_model = model.sampling(data=data, pars=op["par"], iter=iterations, warmup=warmup, chains=chains)    
    
    # Add the node names into the data dict.
    calibrated_model.data["node_names"] = node_names

    return calibrated_model
Example #22
0
def tobit_ifelse_model(tobit_data: pd.DataFrame):
    """
    Use a loop instead of two matrices as preparation for using the adjacency matrix:
    """
    censored_dict = get_datadict(tobit_data)
    censored_loop_model = StanModel(
        file=Path('models/tobit_students_ifelse.stan').open(),
        extra_compile_args=["-w"])
    censored_loop_fit = censored_loop_model.sampling(data=censored_dict,
                                                     iter=2000,
                                                     chains=4,
                                                     warmup=500)
    az.plot_trace(censored_loop_fit)
    az.plot_energy(censored_loop_fit)

    cens_loop_res = censored_loop_fit.extract()
    print('α: {}'.format(cens_loop_res['alpha'][501:].mean()))
    print('β: {}'.format(cens_loop_res['beta'][501:].mean(axis=0)))
    # yay works. intercept: 208.6, read: 2.70, math: 5.93, gen: -12.75, voc: -46.6
    return censored_loop_fit, censored_loop_model
Example #23
0
    def test_empty_parameter(self):
        if pystan_version() == 2:
            model_code = """
                parameters {
                    real y;
                    vector[3] x;
                    vector[0] a;
                    vector[2] z;
                }
                model {
                    y ~ normal(0,1);
                }
            """
            from pystan import StanModel

            model = StanModel(model_code=model_code)
            fit = model.sampling(iter=10, chains=2, check_hmc_diagnostics=False)
            posterior = from_pystan(posterior=fit)
            test_dict = {"posterior": ["y", "x", "z"], "sample_stats": ["diverging"]}
            fails = check_multiple_attrs(test_dict, posterior)
            assert not fails
Example #24
0
def tobit_linear_model(tobit_data: pd.DataFrame):
    """
    getting similar values for read and math like in the example.
    intercept: 242.735; mydata$read: 2.553; mydata$math 5.383 
    """
    tobit_datadict = {
        'y': tobit_data['apt'],
        'N': tobit_data.shape[0],
        'K': len(predictors),
        'X': tobit_data[predictors]
    }
    tobit_linear_model = StanModel(
        file=Path('models/linear_students.stan').open(),
        extra_compile_args=["-w"])
    tob_lin_fit = tobit_linear_model.sampling(data=tobit_datadict,
                                              iter=1000,
                                              chains=4)
    tob_lin_res = tob_lin_fit.extract()
    print('α: {}'.format(tob_lin_res['alpha'][501:].mean()))
    print('β: {}'.format(tob_lin_res['beta'][501:].mean(axis=0)))
    return tob_lin_fit, tobit_linear_model
Example #25
0
def compare_runtimes(segmentDF, adjMatrix):
    """
    usage: adjust main, run the script as `python3 import_data.py > log.txt` and then
    call `grep -E "running model|Gradient|(Total)" log.txt` to read the relevant lines
    most likely won't work on Windows as there seems to be less output.
    """
    iters = 5000
    warmup = 1000
    # 'models/comparison/CAR_simple.stan' -> this model runs forever even with iters = 100
    models = [
        'models/comparison/tobit_for_loop.stan',
        'models/comparison/tobit_vectorised.stan', 'models/crash_tobit.stan',
        'models/crash_car.stan', 'models/comparison/CAR_QR.stan',
        'models/comparison/CAR_simple.stan'
    ]
    data = get_full_dict(
        segmentDF, adjMatrix)  # n_obs, n_cens, p, ii_obs, ii_cens, y_obs, U, X
    model_and_fit = []

    for i, m_file in enumerate(models):
        print(f'running model: {m_file}')
        m_name = m_file.split('/')[-1].split('.')[0]
        try:
            model = load(Path(f'cache/profiling_{m_name}.joblib'))
        except:
            model = StanModel(model_name=m_name,
                              file=Path(m_file).open(),
                              extra_compile_args=['-w'])
            dump(model, Path(f'cache/{m_name}.joblib'))
        fit = model.sampling(data=data,
                             iter=iters,
                             warmup=warmup,
                             control={
                                 'adapt_delta': 0.95,
                                 'max_treedepth': 15
                             })
        model_and_fit.append((model, fit))
    return model_and_fit
Example #26
0
def tobit_cum_sum_scaled(tobit_data: pd.DataFrame):
    """
    Let's now try with the cumulative distribution function. This would be more elegant
    and more efficient than looping over the normals distributions.
    Learned the following:
     - if no lower/ upper bounds are given for the parameters, stan will start around 0
      usually +/- 2
     - real normal_lccdf(reals y | reals mu, reals sigma)
        - if the (y-mu)/sigma < -37.5 or > 8.25, the will be an over/ underflow
    """
    trans = MaxAbsScaler().fit_transform(tobit_data[new_preds + ['apt']])
    data_centered = pd.DataFrame(trans, columns=new_preds + ['apt'])
    is_800 = tobit_data['apt'] == 800
    not_800 = tobit_data['apt'] != 800
    cens_cum_model = StanModel(
        file=Path('models/tobit_students_cumulative.stan').open(),
        extra_compile_args=["-w"])
    cens_cum_dict = {
        'X': data_centered[not_800][new_preds],
        'n': tobit_data.shape[0] - is_800.sum(),
        'y': data_centered[not_800]['apt'],
        'n_cens': is_800.sum(),
        'p': len(new_preds),
        'y_cens': data_centered[is_800]['apt'],
        'U': 1,
        'X_cens': tobit_data[is_800][new_preds]
    }
    # init_test = [{'alpha': 240, 'beta': [2.5, 5.4, -13, -48], 'sigma':50}] * 4
    # init=init_test,
    cens_cum_fit = cens_cum_model.sampling(data=cens_cum_dict,
                                           iter=2000,
                                           chains=4,
                                           warmup=500)
    cens_cum_res = cens_cum_fit.extract()
    print('β: {}'.format(cens_cum_res['beta'][500:].mean(axis=0)))
    return cens_cum_fit, cens_cum_model
Example #27
0
# x = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
x = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
# x = [1, 1, 1]

cf_dat = {
    'N': len(x),
    'x': x,
    'prior_width': 0.2
}

recompile = False

if recompile:
    sm = StanModel(file='coinflip.stan')
    with open('coinflip.pkl', 'wb') as f:
        pickle.dump(sm, f)
else:
    sm = pickle.load(open('coinflip.pkl', 'rb'))

# fit = sm.sampling(data=cf_dat, iter=2000, chains=2)
fit = sm.sampling(data=cf_dat, iter=20, chains=1, seed='random', init=[{'beta': 0.5}], warmup=10)
estimation = fit.extract(permuted=True)

print(estimation['beta'])

pl.hist(estimation['beta'], bins=40)
pl.xlim([0, 1])
pl.show()

cmm = cMM('corr_rate.pkl')
Example #28
0
    'mp_corr_prior_conc': 3,
    'exponent_prior_mean': exponent,
    'base_rate_prior_mean': base_rate,
    'threshold_prior_mean': threshold,
    'n_samples': n_samp_est,
    'stdnorm_samples': stdnorm_samples
}

if recompile:
    sm = StanModel(file='corr_rate.stan', verbose=False)
    with open('corr_rate.pkl', 'wb') as f:
        pickle.dump(sm, f)
else:
    sm = pickle.load(open('corr_rate.pkl', 'rb'))

fit = sm.sampling(data=corr_dat, iter=2000, chains=3)
estimation = fit.extract(permuted=True)
cm = estimation['mp_corr_mat']
pickle.dump(cm, open('corr_mat_samples.pkl', 'wb'))
savemat('corr_mat_samples.mat', {'cm': cm})


mp_col = 'r'
sc_true_col = 'y'
sc_obs_col = 'g'

num_row = 5
num_col = max([n_unit, n_pairs])

act_row = 0
act_col = 1
# Introduce noise
x_data = np.random.normal(x_data, 7)
y_data = np.random.normal(y_data, 8)

# plot the data
pyplot.plot(x_data, y_data, 'o')

stan_data_mappings = {
  'x': x_data,
  'y': y_data,
  'N': N,
}

model = StanModel(file='models/univariate_regression.stan')

fit = model.sampling(data=stan_data_mappings)

params = fit.extract()
a_pred = params['a']
b_pred = params['b']
sigma_pred = params['sigma']

# Draw 100 points from where x_data is.
xfit = np.linspace(-10 + min(x_data), 10 + max(x_data), 100)

# Number of samples.
M = len(a_pred)

yfit = a_pred.reshape((M, 1)) + b_pred.reshape((M, 1)) * xfit

# Get mean for 100 poinst and std at those points.
Example #30
0
}

if model_type == 1:
    fname = "csm"
elif model_type == 2:
    fname = "csm2"
elif model_type == 3:
    fname = "msm"
if recompile:
    sm = StanModel(file=fname + '_inference.stan')
    with open(fname + '_inference.pkl', 'wb') as f:
        pickle.dump(sm, f)
else:
    sm = pickle.load(open(fname + '_inference.pkl', 'rb'))

fit = sm.sampling(data=gsm_dat, iter=2000, chains=2)
estimation = fit.extract(permuted=True)

g_est_mean = np.mean(estimation["g"], 0)
print('g est', g_est_mean)
print('g true', g_synth)

z_est_mean = np.mean(estimation["z"], 0).T
print('z est', z_est_mean)
print('z true', z_synth)

pl.subplot(221)
pl.hist(estimation['z'], bins=40)
pl.plot([z_synth[0], z_synth[0]], [0, pl.gca().get_ylim()[1]], color='r', linestyle='-', linewidth=2)

pl.subplot(222)
Example #31
0
      theta[j] <- mu + tau * eta[j];
  }
  model {
    eta ~ normal(0, 1);
    y ~ normal(theta, sigma);
  }
'''
m = StanModel(model_code=schools_code, model_name=model_name, verbose=True)

J = 8
y = (28,  8, -3,  7, -1,  1, 18, 12)
sigma = (15, 10, 16, 11,  9, 11, 10, 18)

iter = 1000
dat = dict(J=J, y=y, sigma=sigma)
ss1 = m.sampling(data=dat, iter=iter, chains=4, refresh=100)

print(ss1)
ss1.traceplot()

ss = stan(model_code=schools_code, data=dat, iter=iter, chains=4,
          sample_file='8schools.csv')
print(ss)
ss.plot()


# using previous fitted objects
ss2 = stan(fit=ss, data=dat, iter=2000)
ss2.summary(probs=[0.38])
ss2.summary(probs=[0.48])
# ss2.summary(probs=[0.48], use_cache=False)
Example #32
0
    p = expit(X @ beta + alpha)
    log_probs = np.log(p) * y + np.log(1. - p) * (1 - y)
    lp = np.mean(log_probs)
    return lp


# Compile model from scratch
sm = StanModel(model_code=model_code)
with open('sm3.pkl', 'wb') as f:
    pickle.dump(sm, f)

# Read model from from file
# with open('sm3.pkl', 'rb') as f:
#     sm = pickle.load(f)

fit = sm.sampling(data=data, seed=seed, chains=6, algorithm='NUTS')
samples = fit.extract(permuted=True)
beta_tilde, lamb, tau_tilde, csquared, alphas = samples['beta_tilde'], samples[
    'lambda'], samples['tau_tilde'], samples['csquared'], samples['alpha']
numer = (csquared * lamb.T * lamb.T)
denom = (csquared + tau_tilde * tau_tilde * lamb.T * lamb.T)
lambda_tilde = np.sqrt((numer / denom).T)
betas = (tau_tilde * (beta_tilde * lambda_tilde).T).T

best_mlpd = -1000000
best_beta = None
best_alpha = None

for beta, alpha in zip(betas, alphas):
    mlpd = calc_mlpd(beta, alpha, Xtrain, ytrain)
    if mlpd > best_mlpd:
Example #33
0
for i in range(N):
    act_u = u_synth[i, :].T
    act_mean = z_synth[i] * A.dot(act_u)
    x_synth[i, :] = multivariate_normal(act_mean, sigma_x)

gsm_dat = {
    "N": N,
    "d_x": d_x,
    "d_u": d_u,
    "sigma_x": sigma_x,
    "x": x_synth,
    "A": A,
    "C": C,
    "z_shape": z_shape,
    "z_scale": z_scale,
}

if recompile:
    sm = StanModel(file="gsm_inference.stan")
    with open("gsm_inference.pkl", "wb") as f:
        pickle.dump(sm, f)
else:
    sm = pickle.load(open("gsm_inference.pkl", "rb"))

fit = sm.sampling(data=gsm_dat, iter=100, chains=8)
estimation = fit.extract(permuted=True)
z_est_mean = mean(estimation["z"], 0).T
print(z_est_mean)

print(z_synth)
d1 = 2*(np.random.random(N)-0.5)
A = lambda sc,de : lambda d : sc*d*np.exp(-np.abs(d)/de) 
alpha = A(5,1)
sigma = 0.06
y = (np.random.rand(N)<phi(df/sigma-alpha(d1))).astype(int)

plt.plot(d1,alpha(d1),'rx',)
plt.plot(d1,df/sigma,'gx',)


sm = pickle.load(open('model.pkl','rb'))

beta1=2.
beta2=2.
beta3=0.001
cauchy=0.1
model_dat = {'N': N,
               'y': y,
               'df': df,
               'd1':d1,
                'beta1':beta1,'beta2':beta2,'beta3':beta3,
              'cauchy':cauchy}

fit = sm.sampling(data=model_dat)#,algorithm="Fixed_param")

ext = fit.extract()

data = {'df':df,'d1':d1}
d={'fit':fit,'model':sm,'model_dat':model_dat}
pickle.dump(d, open('save_fit.p','wb'))
}

v_iter = [10000]
v_delta = [0.01]
v_steps = [100]

for n_iter in v_iter:
    for delta in v_delta:
        for n_steps in v_steps:
            print 'n_iter: ' + str(n_iter) + ', delta: ' + str(
                delta) + ', n_steps: ' + str(n_steps)
            m = StanModel(file='iris.stan')
            control = {'stepsize': delta, 'int_time': n_steps * delta}
            fit = m.sampling(data=iris_data,
                             iter=n_iter,
                             chains=1,
                             warmup=20,
                             algorithm='HMC',
                             control=control)
            trace_0 = pd.DataFrame(fit.extract(['alpha', 'beta_0', 'beta_1']))
            varnames = ['alpha', 'beta0', 'beta1']
            trace_0.columns = varnames
            g = sns.pairplot(trace_0[int(n_iter / 10):n_iter])
            #g.savefig("stan_"+str(n_iter)+"_"+str(delta)+"_"+str(n_steps)+".png")
            g.savefig("stan.png")
            plt.show()
            plt.close()
            del (g)
            del (trace_0)
            #sns.plt.show()
'''
trace ~ bernoulli(inv_logit(trace_0[0]+trace_0[1]*y_0))
Example #36
0
      theta[j] <- mu + tau * eta[j];
  }
  model {
    eta ~ normal(0, 1);
    y ~ normal(theta, sigma);
  }
'''
m = StanModel(model_code=schools_code, model_name=model_name, verbose=True)

J = 8
y = (28, 8, -3, 7, -1, 1, 18, 12)
sigma = (15, 10, 16, 11, 9, 11, 10, 18)

iter = 1000
dat = dict(J=J, y=y, sigma=sigma)
ss1 = m.sampling(data=dat, iter=iter, chains=4, refresh=100)

print(ss1)
ss1.traceplot()

ss = stan(model_code=schools_code,
          data=dat,
          iter=iter,
          chains=4,
          sample_file='8schools.csv')
print(ss)
ss.plot()

# using previous fitted objects
ss2 = stan(fit=ss, data=dat, iter=2000)
ss2.summary(probs=[0.38])
Example #37
0
class StanMetaRegression(BaseEstimator):
    """Bayesian meta-regression estimator using Stan.

    Parameters
    ----------
    **sampling_kwargs
        Optional keyword arguments to pass on to the MCMC sampler
        (e.g., `iter` for number of iterations).

    Notes
    -----
    For most uses, this class should be ignored in favor of the functional
    stan() estimator. The object-oriented interface is useful primarily
    when fitting the meta-regression model repeatedly to different data;
    the separation of .compile() and .fit() steps allows one to compile
    the model only once.

    Warning
    -------
    With changes to Stan in version 3, which requires Python 3.7, this class no longer works for
    Python 3.7+. We will try to fix it in the future.
    """

    _result_cls = BayesianMetaRegressionResults

    def __init__(self, **sampling_kwargs):
        self.sampling_kwargs = sampling_kwargs
        self.model = None
        self.result_ = None

    def compile(self):
        """Compile the Stan model."""
        # Note: we deliberately use a centered parameterization for the
        # thetas at the moment. This is sub-optimal in terms of estimation,
        # but allows us to avoid having to add extra logic to detect and
        # handle intercepts in X.
        spec = """
        data {
            int<lower=1> N;
            int<lower=1> K;
            vector[N] y;
            int<lower=1,upper=K> id[N];
            int<lower=1> C;
            matrix[K, C] X;
            vector[N] sigma;
        }
        parameters {
            vector[C] beta;
            vector[K] theta;
            real<lower=0> tau2;
        }
        transformed parameters {
            vector[N] mu;
            mu = theta[id] + X * beta;
        }
        model {
            y ~ normal(mu, sigma);
            theta ~ normal(0, tau2);
        }
        """
        try:
            from pystan import StanModel
        except ImportError:
            raise ImportError(
                "Please install pystan or, if using Python 3.7+, switch to Python 3.6."
            )

        self.model = StanModel(model_code=spec)

    def fit(self, y, v, X, groups=None):
        """Run the Stan sampler and return results.

        Parameters
        ----------
        y : :obj:`numpy.ndarray` of shape (K,)
            1d array of study-level estimates
        v : :obj:`numpy.ndarray` of shape (K,)
            1d array of study-level variances
        X : :obj:`numpy.ndarray` of shape (K[, P])
            1d or 2d array containing study-level predictors
            (including intercept); has dimensions K x P, where K is the
            number of studies and P is the number of predictor variables.
        groups : :obj:`list` of :obj:`int`, optional
            1d array of integers identifying
            groups/clusters of observations in the y/v/X inputs. If
            provided, values must consist of integers in the range of 1..k
            (inclusive), where k is the number of distinct groups. When
            None (default), it is assumed that each observation in the
            inputs is a separate group.

        Returns
        -------
        A StanFit4Model object (see PyStan documentation for details).

        Notes
        -----
        This estimator supports (simple) hierarchical models. When multiple
        estimates are available for at least one of the studies in `y`, the
        `groups` argument can be used to specify the nesting structure
        (i.e., which rows in `y`, `v`, and `X` belong to each study).
        """
        if y.ndim > 1 and y.shape[1] > 1:
            raise ValueError("The StanMetaRegression estimator currently does "
                             "not support 2-dimensional inputs. Passed y has "
                             "shape {}.".format(y.shape))

        if self.model is None:
            self.compile()

        N = y.shape[0]
        groups = groups or np.arange(1, N + 1, dtype=int)
        K = len(np.unique(groups))

        data = {
            "K": K,
            "N": N,
            "id": groups,
            "C": X.shape[1],
            "X": X,
            "y": y.ravel(),
            "sigma": v.ravel(),
        }

        self.result_ = self.model.sampling(data=data, **self.sampling_kwargs)
        return self

    def summary(self, ci=95):
        """Generate a BayesianMetaRegressionResults object from the fitted estimator."""
        if self.result_ is None:
            name = self.__class__.__name__
            raise ValueError("This {} instance hasn't been fitted yet. Please "
                             "call fit() before summary().".format(name))
        return BayesianMetaRegressionResults(self.result_, self.dataset_, ci)
Example #38
0
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from pystan import StanModel
%matplotlib inline

filepath = '/Users/takahiro-nakano/R/RStanBook/chap04/input/data-salary.txt'
df = pd.read_csv(filepath)
data = {'N':len(df),'X':df['X'].values,'Y':df['Y'].values}

stanmodel = StanModel(file='/Users/takahiro-nakano/github_personal/pystan/test/normal.stan')
fit_nuts = stanmodel.sampling(data=data, n_jobs=1, iter=100, chains = 4)
print(fit_nuts.summary())
mcmc_sample = fit_nuts.extract()
a = mcmc_sample['a']
b = np.array([i for i in range(len(a))])
fit_nuts.plot()
plt.show()
Example #39
0
        pickle.dump(stanmodel, f)
else:
    stanmodel = pickle.load(open(smfile, 'rb'))

def get_median(sample_array):
        sample_array.sort()
        n = len(sample_array)
        medianvalue = sample_array[n/2] if (n % 2) == 0 else (sample_array[n/2] + sample_array[n/2 + 1]) / 2.0
        return medianvalue

pdf = bp.PdfPages(betapdffile)
results = {}
for obsid, odata in standata.iter_observations():
    sample_outfile = os.path.join(sampledir, obsid + "_samples.txt") if sampledir != "" else None
    sdata = { "N":N, "K":K, "x":matrix, "y":odata }
    fit = stanmodel.sampling(data=sdata, iter=nsamples, n_jobs=njobs, sample_file=sample_outfile)
    pars = fit.extract(["beta"])
    betasamples = pars["beta"]
    results[obsid] = []
    samples = []
    for i in range(K):
        ith_samples = [ bs[i] for bs in betasamples ]
        medianvalue = get_median(ith_samples)
        results[obsid].append(medianvalue)
        samples.append(ith_samples)
    f = plt.figure()
    plt.boxplot(samples) #, labels=standata.cellids)
    plt.xticks(numpy.arange(1 + len(standata.cellids)), ["0"] + standata.cellids, rotation=90)
    f.suptitle(obsid)
    pdf.savefig(f)
    #fit.plot()
Example #40
0
def get_median(sample_array):
    sample_array.sort()
    n = len(sample_array)
    medianvalue = sample_array[n / 2] if (
        n % 2) == 0 else (sample_array[n / 2] + sample_array[n / 2 + 1]) / 2.0
    return medianvalue


pdf = bp.PdfPages(betapdffile)
results = {}
for obsid, odata in standata.iter_observations():
    sample_outfile = os.path.join(sampledir, obsid +
                                  "_samples.txt") if sampledir != "" else None
    sdata = {"N": N, "K": K, "x": matrix, "y": odata}
    fit = stanmodel.sampling(data=sdata,
                             iter=nsamples,
                             n_jobs=njobs,
                             sample_file=sample_outfile)
    pars = fit.extract(["beta"])
    betasamples = pars["beta"]
    results[obsid] = []
    samples = []
    for i in range(K):
        ith_samples = [bs[i] for bs in betasamples]
        medianvalue = get_median(ith_samples)
        results[obsid].append(medianvalue)
        samples.append(ith_samples)
    f = plt.figure()
    plt.boxplot(samples)  #, labels=standata.cellids)
    plt.xticks(numpy.arange(1 + len(standata.cellids)),
               ["0"] + standata.cellids,
               rotation=90)
Example #41
0
import pandas as pd
from pystan import StanModel
import matplotlib.pyplot as plt
import pickle

d = pd.read_csv('input/data-attendance-1.txt')
d.Score /= 200
data = d.to_dict('list')
data.update({'N':len(d)})

stanmodel = StanModel(file='model/model5-3.stan')

# NUTS (No U-Turn Sampler)
fit_nuts = stanmodel.sampling(data=data, n_jobs=1)
mcmc_sample = fit_nuts.extract()
mu_est = mcmc_sample['mu']

# ADVI (Automatic Differentiation Variational Inference)
fit_vb = stanmodel.vb(data=data)
vb_sample = pd.read_csv(fit_vb['args']['sample_file'].decode('utf-8'), comment='#')
vb_sample = vb_sample.drop([0,1])
mu_est = vb_sample.filter(regex='mu\.\d+')

with open('output/model_and_result.pkl', 'wb') as f:
    pickle.dump(stanmodel, f)
    pickle.dump(fit_nuts, f)
Example #42
0
File: toy.py Project: andycasey/ges
			log1m_alpha + normal_log(sp_vector[2], outlier_teff_mu, outlier_teff_sigma)
		));
    }
}"""

# Ok, here is our toy data:
with open("toy.data", "r") as fp:
	data = json.load(fp)

model = StanModel(model_code=model_code)

print("Optimizing...")
op = model.optimizing(data=data)

print("Fitting...")
fit = model.sampling(data=data, pars=op["par"], iter=20000)

subplots_adjust = { "left": 0.10, "bottom": 0.05, "right": 0.95, "top": 0.95,
	"wspace": 0.20, "hspace": 0.45
	}

nodes = range(2)
dimensions = ("teff", "logg")

# Plot the m, b parameters for each node
dimensions_traced = []
for node in nodes:
	node_dimensions = \
		["m_{dim}_node{n}".format(dim=dimension, n=node) for dimension in dimensions] \
	  + ["b_{dim}_node{n}".format(dim=dimension, n=node) for dimension in dimensions]
	dimensions_traced.extend(node_dimensions)
Example #43
0
def test8schools():

    model_name = "_8chools"
    sfile = os.path.join(os.path.dirname(__file__),
                         "../stan/src/models/misc/eight_schools/eight_schools.stan")
    m = StanModel(file=sfile, model_name=model_name, verbose=True)
    m.dso

    yam = StanModel(file=sfile, model_name=model_name, save_dso=False, verbose=True)
    yam.dso

    dat = dict(J=8, y=(28,  8, -3,  7, -1,  1, 18, 12),
               sigma=(15, 10, 16, 11,  9, 11, 10, 18))

    iter = 5020

    # HMC
    ss1 = m.sampling(data=dat, iter=iter, chains=4, algorithm='HMC', refresh=100)
    ss1son = stan(fit=ss1, data=dat, init_r=0.0001)
    ss1son = stan(fit=ss1, data=dat, init_r=0)
    ainfo1 = ss1.get_adaptation_info()
    lp1 = ss1.get_logposterior()
    yalp1 = ss1.get_logposterior(inc_warmup=False)
    sp1 = ss1.get_sampler_params()
    yasp1 = ss1.get_sampler_params(inc_warmup=False)
    gm1 = ss1.get_posterior_mean()
    print(gm1)

    # NUTS 1
    ss2 = m.sampling(data=dat, iter=iter, chains=4, refresh=100,
                     control=dict(metric="unit_e"))
    ainfo2 = ss2.get_adaptation_info()
    lp2 = ss2.get_logposterior()
    yalp2 = ss2.get_logposterior(inc_warmup=False)
    sp2 = ss2.get_sampler_params()
    yasp2 = ss2.get_sampler_params(inc_warmup=False)
    gm2 = ss2.get_posterior_mean()
    print(gm2)

    # NUTS 2
    ss3 = m.sampling(data=dat, iter=iter, chains=4, refresh=100)
    ainfo3 = ss3.get_adaptation_info()
    lp3 = ss3.get_logposterior()
    yalp3 = ss3.get_logposterior(inc_warmup=False)
    sp3 = ss3.get_sampler_params()
    yasp3 = ss3.get_sampler_params(inc_warmup=False)

    gm3 = ss3.get_posterior_mean()
    print(gm3)

    # Non-diag
    ss4 = m.sampling(data=dat, iter=iter, chains=4,
                     control=dict(metric='dense_e'), refresh=100)
    ainfo4 = ss4.get_adaptation_info()
    lp4 = ss4.get_logposterior()
    yalp4 = ss4.get_logposterior(inc_warmup=False)
    sp4 = ss4.get_sampler_params()
    yasp4 = ss4.get_sampler_params(inc_warmup=False)

    gm4 = ss4.get_posterior_mean()
    print(gm4)

    print(ss1)
    print(ss2)
    print(ss3)

    ss1.plot()
    ss1.traceplot()

    ss9 = m.sampling(data=dat, iter=iter, chains=4, refresh=10)

    iter = 52012

    ss = stan(sfile, data=dat, iter=iter, chains=4, sample_file='8schools.csv')

    print(ss)

    ss_inits = ss.inits
    ss_same = stan(sfile, data=dat, iter=iter, chains=4,
                   seed=ss.stan_args[0]['seed'], init=ss_inits,
                   sample_file='ya8schools.csv')

    b = np.allclose(ss.extract(permuted=False), ss_same.extract(permuted=False))
    # b is not true as ss is initialized randomly while ss.same is not.

    s = ss_same.summary(pars="mu", probs=(.3, .8))
    # not in python: print(ss.same, pars='theta', probs=c(.4, .8))
    print(ss_same)