Example #1
0
def stanTopkl():
    if os.path.isfile('log_normal.pkl'):
        os.remove('log_normal.pkl')
    sm = StanModel(file='log_normal.stan')
    with open('log_normal.pkl', 'wb') as f:
        pickle.dump(sm, f)

    if os.path.isfile('log_t.pkl'):
        os.remove('log_t.pkl')
    sm = StanModel(file='log_t.stan')
    with open('log_t.pkl', 'wb') as f:
        pickle.dump(sm, f)
Example #2
0
def stanTopkl():
    """
    The function complies 'stan' models first and avoids re-complie of the model.
    """
    if os.path.isfile('log_normal.pkl'):
        os.remove('log_normal.pkl')
    sm = StanModel(file='log_normal.stan')
    with open('log_normal.pkl', 'wb') as f:
        pickle.dump(sm, f)

    if os.path.isfile('log_t.pkl'):
        os.remove('log_t.pkl')
    sm = StanModel(file='log_t.stan')
    with open('log_t.pkl', 'wb') as f:
        pickle.dump(sm, f)
Example #3
0
def get_or_compile_stan_model(model_file, distribution):
    """
    Creates Stan model. Compiles a Stan model and saves it to .pkl file to the folder selected by tempfile module if
        file doesn't exist yet and load precompiled model if there is a model file in temporary dir.
    Args:
        model_file: model file location
        distribution: name of the KPI distribution model, which assumes a 
            Stan model file with the same name exists
    Returns:
        returns compiled Stan model for the selected distribution or normal distribution
            as a default option
    Note: compiled_model_file is the hardcoded file path which may cause some issues in future.
    There are 2 alternative implementations for Stan models handling:
        1. Using global variables
        2. Pre-compiling stan models and adding them as a part of expan project
        (3). Using temporary files with tempfile module is not currently possible, since it 
            generates a unique file name which is difficult to track.
        However, compiled modules are saved in temporary directory using tempfile module 
        which vary based on the current platform and settings. Cleaning up a temp dir is done on boot.
    """
    python_version = '{0[0]}.{0[1]}'.format(sys.version_info)
    compiled_model_file = tempfile.gettempdir() + '/expan_early_stop_compiled_stan_model_' \
                          + distribution + '_' + python_version + '.pkl'

    if os.path.isfile(compiled_model_file):
        sm = pickle.load(open(compiled_model_file, 'rb'))
    else:
        sm = StanModel(file=model_file)
        with open(compiled_model_file, 'wb') as f:
            pickle.dump(sm, f)
    return sm
Example #4
0
def simple_car_model(tobit_data: pd.DataFrame, ad_matrix):
    """
    In the model of the researchers, phi is distributed around phi_bar
    is this handled by the multi_normal_prec??? Need to understand docs and adjust if not.
      - seems to be legit. Documentation of WinBUGS does it in a similar way.
    https://mc-stan.org/docs/2_19/functions-reference/multivariate-normal-distribution-precision-parameterization.html
     - find out what the CAR prior in car.normal is. Right now I just have 2/-2 ...
       - Unfortunately, there is no information available. Just need to set something that works.

    """
    car_model = StanModel(file=Path('models/tobit_car_students.stan').open(),
                          extra_compile_args=["-w"])
    car_dict = get_datadict()
    car_dict['W'] = ad_matrix
    car_dict['U'] = 800

    # this smaller run still took 25 mins to sample...
    # And still getting too low E-BFMI values
    car_fit = car_model.sampling(data=car_dict,
                                 iter=2000,
                                 warmup=500,
                                 chains=4)
    dump(car_fit, Path('data/car_students_2000.joblib'))
    car_res = car_fit.extract()
    print('β_0: {}'.format(car_res['beta_zero'][501:].mean()))
    print('β:   {}'.format(car_res['beta'][501:].mean(axis=0)))

    # getting many rejections - bad? Phi is a bit like a covariance matrix
    # -> only in the beginning, after 200 iterations all fine.
    # result from the run: chains have not mixed, might need to re-parametrize...
    # am I contraining the variables too much??? Need to center somehow?
    return car_fit, car_model
Example #5
0
def compile_stan_model(stan_model_name):
    """
    Compile stan model and save as pkl
    """
    source_model = pkg_resources.resource_filename(
        'orbit', 'stan/{}.stan'.format(stan_model_name))
    compiled_model = pkg_resources.resource_filename(
        'orbit', 'stan_compiled/{}.pkl'.format(stan_model_name))

    # updated for py3
    os.makedirs(os.path.dirname(compiled_model), exist_ok=True)

    # compile if stan source has changed
    if not os.path.isfile(compiled_model) or \
            os.path.getmtime(compiled_model) < os.path.getmtime(source_model):

        with open(source_model, encoding="utf-8") as f:
            model_code = f.read()

        sm = StanModel(model_code=model_code)

        with open(compiled_model, 'wb') as f:
            pickle.dump(sm, f, protocol=pickle.HIGHEST_PROTOCOL)

    return None
Example #6
0
 def test_stanc_exception(self):
     model_code = 'parameters {real z;} model {z ~ no_such_distribution();}'
     assertRaisesRegex = self.assertRaisesRegexp if PY2 else self.assertRaisesRegex
     with assertRaisesRegex(ValueError, 'unknown distribution'):
         stanc(model_code=model_code)
     with assertRaisesRegex(ValueError, 'unknown distribution'):
         StanModel(model_code=model_code)
Example #7
0
def get_stan_model(model_name, recompile=False, **model_params):
    """
    Compile a Stan probabilistic model, or load pre-compiled model from cache,
    if available.

    Parameters
    ----------
    model_name : str
        Then name of the model used
            "bernoulli", "b": Bernoulli likelihood flat prior on probabilities
            "beta-binomial", "bb": Binomial likelihood and Beta prior
    recompile : boolean
        If set to to True, always recompile the model, otherwise try to use
        the cached pickle of the model.
    """

    python_version = 'python{0[0]}.{0[1]}'.format(sys.version_info)

    filename = "-".join([_f for _f in [model_name, python_version] if _f])
    compiled_model_file = os.path.join(STAN_MODEL_CACHE, filename + ".pickle")

    if os.path.isfile(compiled_model_file) and not recompile:
        with open(compiled_model_file, 'rb') as m:
            model = pickle.load(m)
    else:
        model_code = get_stan_model_code(model_name)
        model = StanModel(model_code=model_code)
        logging.info('Saving model to {}'.format(compiled_model_file))
        with open(compiled_model_file, 'wb') as f:
            pickle.dump(model, f)

    return model
Example #8
0
def run_or_load_model(m_type, m_dict, iters, warmup, c_params):
    if m_type not in ['car', 'tobit']:
        raise Exception('Invalid model type!')
    name = 'crash_{}_{}-{}_delta_{}_max_{}'.format(m_type, iters, warmup,
                                                   c_params['adapt_delta'],
                                                   c_params['max_treedepth'])
    try:
        model = load(Path('cache/' + name + '_model.joblib'))
    except:
        model = StanModel(file=Path(
            'models/crash_{}.stan'.format(m_type)).open(),
                          extra_compile_args=["-w"],
                          model_name=name.split('-')[0])
        dump(model, Path('cache/' + name + '_model.joblib'))
    try:
        fit = load(Path('cache/' + name + '_fit.joblib'))
    except:
        fit = model.sampling(data=m_dict,
                             iter=iters,
                             warmup=warmup,
                             control=c_params,
                             check_hmc_diagnostics=True)
        info = fit.stansummary()
        with open(Path('logs/' + name + '.log'), 'w') as c_log:
            c_log.write(info)
        dump(fit, Path('cache/' + name + '_fit.joblib'))
    return model, fit
Example #9
0
 def fit(self, iterations=5000, control=default_control):
     self.stan_model = StanModel('../stan/player_scoring.stan')
     self.samples = self.stan_model.sampling(self.stan_data,
                                             iter=iterations,
                                             chains=4,
                                             refresh=1,
                                             control=control)
Example #10
0
    def test_empty_parameter(self):
        model_code = """
            parameters {
                real y;
                vector[3] x;
                vector[0] a;
                vector[2] z;
            }
            model {
                y ~ normal(0,1);
            }
        """
        if pystan_version() == 2:
            from pystan import StanModel  # pylint: disable=import-error

            model = StanModel(model_code=model_code)
            fit = model.sampling(iter=500,
                                 chains=2,
                                 check_hmc_diagnostics=False)
        else:
            import stan  # pylint: disable=import-error

            model = stan.build(model_code)
            fit = model.sample(num_samples=500, num_chains=2)

        posterior = from_pystan(posterior=fit)
        test_dict = {
            "posterior": ["y", "x", "z", "~a"],
            "sample_stats": ["diverging"]
        }
        fails = check_multiple_attrs(test_dict, posterior)
        assert not fails
Example #11
0
 def compile(self):
     """Compile the Stan model."""
     # Note: we deliberately use a centered parameterization for the
     # thetas at the moment. This is sub-optimal in terms of estimation,
     # but allows us to avoid having to add extra logic to detect and
     # handle intercepts in X.
     spec = f"""
     data {{
         int<lower=1> N;
         int<lower=1> K;
         vector[N] y;
         int<lower=1,upper=K> id[N];
         int<lower=1> C;
         matrix[K, C] X;
         vector[N] sigma;
     }}
     parameters {{
         vector[C] beta;
         vector[K] theta;
         real<lower=0> tau2;
     }}
     transformed parameters {{
         vector[N] mu;
         mu = theta[id] + X * beta;
     }}
     model {{
         y ~ normal(mu, sigma);
         theta ~ normal(0, tau2);
     }}
     """
     from pystan import StanModel
     self.model = StanModel(model_code=spec)
Example #12
0
 def test_bernoulli_compile_time(self):
     model_code = self.bernoulli_model_code
     t0 = time.time()
     model = StanModel(model_code=model_code)
     self.assertIsNotNone(model)
     msg = "Compile time: {}s (vs. RStan 28s)\n".format(int(time.time()-t0))
     logging.info(msg)
Example #13
0
    def __init__(self, context, rdd, prepare_data_callback, stan_file):
        self.rdd = rdd
        self.prepare_data_callback = prepare_data_callback
        self.n_partitions = self.rdd.getNumPartitions()

        sm = StanModel(file=stan_file)
        pickle.dump(sm, open(PICKLE_FILENAME, "wb"))
        context.addFile(PICKLE_FILENAME)
Example #14
0
def scaled_spare_car(tobit_data: pd.DataFrame, ad_matrix):
    """
    will try with values closer to 0 now.
    sigma was  67.3  with stdev 3.74
    even worse - E-BMFI is still small, but now also much treedepth saturation (OK)
    and chain divergence (bad!) would need to check energy-plots and what correlates...
    TODO: if I scale, I have the danger of missing not hitting the condition for U...
     -> should not be a problem if I have zeros there as lower bound
    """
    tobit_data['ones'] = np.ones(tobit_data.shape[0])
    trans = MaxAbsScaler().fit_transform(tobit_data[new_preds + ['apt']])
    data_centered = pd.DataFrame(trans, columns=new_preds + ['apt'])
    is_800 = tobit_data['apt'] == 800
    not_800 = tobit_data['apt'] != 800
    ii_obs = tobit_data[not_800]['id']
    ii_cens = tobit_data[is_800]['id']
    # After using vectorisation: Gradient takes 0.0003  seconds.
    c_sparse_dict = {
        'X': data_centered[new_preds],
        'n': tobit_data.shape[0],
        'n_obs': not_800.sum(),
        'n_cens': is_800.sum(),
        'y_obs': data_centered[not_800]['apt'],
        'ii_obs': ii_obs,
        'ii_cens': ii_cens,
        'p': len(new_preds),
        'y_cens': data_centered[is_800]['apt'],
        'W': ad_matrix,
        'U': 1,
        'W_n': ad_matrix.sum() // 2
    }
    # or just 'models/sparse_tcar_students_without_QR.stan'
    c_sp_model = StanModel(file=Path('sparse_tobitcar_students.stan').open(),
                           verbose=False,
                           extra_compile_args=["-w"])
    c_params = {'adapt_delta': 0.95, 'max_treedepth': 12}
    # no more saturation, but still divergence...
    # trying to constrain the model: α <= 0.99 instead <=1, σ >= 0.001
    c_sp_fit = c_sp_model.sampling(c_sparse_dict,
                                   iter=4000,
                                   warmup=500,
                                   control=c_params)
    c_sp_res = c_sp_fit.extract()
    print(c_sp_fit.stansummary())
    dump(c_sp_fit, 'data/c_sp_4000.joblib')
    plt.scatter(c_sp_fit['lp__'], c_sp_fit['sigma'])

    # sigma looks very correlated.
    simpler_csp = c_sp_res.copy()
    del simpler_csp['phi']
    del simpler_csp['y_cens']
    del simpler_csp['beta']
    del simpler_csp['y']
    if 'theta' in simpler_csp:
        del simpler_csp['theta']
    c_sp_df = pd.DataFrame.from_dict(simpler_csp)
    sns.pairplot(c_sp_df)
    return c_sp_fit, c_sp_model
Example #15
0
def _bayes_sampling(x, y, distribution='normal'):
    """
	Helper function.

	Args:
		x (array_like): sample of a treatment group
		y (array_like): sample of a control group
		distribution: name of the KPI distribution model, which assumes a
			Stan model file with the same name exists

	Returns:
		tuple:
			- the posterior samples
			- sample size of x
			- sample size of y
			- absolute mean of x
			- absolute mean of y
	"""
    # Checking if data was provided
    if x is None or y is None:
        raise ValueError('Please provide two non-None samples.')

    # Coercing missing values to right format
    _x = np.array(x, dtype=float)
    _y = np.array(y, dtype=float)

    mu_x = np.nanmean(_x)
    mu_y = np.nanmean(_y)
    n_x = statx.sample_size(_x)
    n_y = statx.sample_size(_y)

    if distribution == 'normal':
        fit_data = {'Nc': n_y, 'Nt': n_x, 'x': _x, 'y': _y}
    elif distribution == 'poisson':
        fit_data = {
            'Nc': n_y,
            'Nt': n_x,
            'x': _x.astype(int),
            'y': _y.astype(int)
        }
    else:
        raise NotImplementedError
    model_file = __location__ + '/../models/' + distribution + '_kpi.stan'
    sm = StanModel(file=model_file)

    fit = sm.sampling(data=fit_data,
                      iter=25000,
                      chains=4,
                      n_jobs=1,
                      seed=1,
                      control={
                          'stepsize': 0.01,
                          'adapt_delta': 0.99
                      })
    traces = fit.extract()

    return traces, n_x, n_y, mu_x, mu_y
Example #16
0
def build_stan_model(target_dir, model_dir=MODEL_DIR):
    from pystan import StanModel
    model_name = 'prophet.stan'
    target_name = 'prophet_model.pkl'
    with open(os.path.join(model_dir, model_name)) as f:
        model_code = f.read()
    sm = StanModel(model_code=model_code)
    with open(os.path.join(target_dir, target_name), 'wb') as f:
        pickle.dump(sm, f, protocol=pickle.HIGHEST_PROTOCOL)
Example #17
0
def verify_stan():
    """
    Simplest model to verify the stan installation
    """
    model_code = 'parameters {real y;} model {y ~ normal(0,1);}'
    model = StanModel(model_code=model_code)
    y = model.sampling().extract()['y']
    print('If this worked, you will see a value near 0 now:')
    print(y.mean())
Example #18
0
def compile_stan_models(target_dir, model_dir=MODEL_DIR):
    """Pre-compile the stan models that are used by the module."""
    from pystan import StanModel

    names = ["simple_model.stan", "model_with_prior.stan"]
    targets = ["simple_model.pkl", "prior_model.pkl"]
    for (name, target) in zip(names, targets):
        sm = StanModel(file=os.path.join(model_dir, name))
        with open(os.path.join(target_dir, target), "wb") as f:
            pickle.dump(sm, f, protocol=pickle.HIGHEST_PROTOCOL)
def main():
    schools_dat = {
        'J': 8,
        'y': [28, 8, -3, 7, -1, 1, 18, 12],
        'sigma': [15, 10, 16, 11, 9, 11, 10, 18]
    }
    sm = StanModel(file='model.stan')
    fit = sm.sampling(data=schools_dat, iter=1000, chains=4, seed=555)
    with open(DATA_FILE_NAME, 'wb') as f:
        pickle.dump({'model': sm, 'fit': fit}, f)
Example #20
0
def build_model(stan_file):
    pkl_file = stan_file.replace('.stan', '.pkl')

    if pkl_file not in listdir():
        model = StanModel(file=stan_file)
        pickle_to_file(model, pkl_file)
    else:
        model = pickle_from_file(pkl_file)

    return model
Example #21
0
def build_stan_models(target_dir, models_dir=MODELS_DIR):
    from pystan import StanModel
    for model_type in ['linear', 'logistic']:
        model_name = 'prophet_{}_growth.stan'.format(model_type)
        target_name = '{}_growth.pkl'.format(model_type)
        with open(os.path.join(models_dir, model_name)) as f:
            model_code = f.read()
        sm = StanModel(model_code=model_code)
        with open(os.path.join(target_dir, target_name), 'wb') as f:
            pickle.dump(sm, f, protocol=pickle.HIGHEST_PROTOCOL)
Example #22
0
def compile_stan_models(target_dir, model_dir=MODEL_DIR):
    """Pre-compile the stan models that are used by the module."""
    from pystan import StanModel

    print("Compiling Stan player model, and putting pickle in {}".format(
        target_dir))
    sm = StanModel(file=os.path.join(model_dir, "player_forecasts.stan"))
    with open(os.path.join(target_dir, "player_forecasts.pkl"),
              "wb") as f_stan:
        pickle.dump(sm, f_stan, protocol=pickle.HIGHEST_PROTOCOL)
Example #23
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="This script compiles a stan model and pickles it to disc")
    parser.add_argument("stan", help="The stan model file")
    parser.add_argument("out", help="The python3 pickle output file")
    args = parser.parse_args()

    sm = StanModel(file=args.stan)
    with open(args.out, 'wb') as f:
        pickle.dump(sm, f)
Example #24
0
    def build_stan_models(self):
        from pystan import StanModel
        target_dir = os.path.join(self.build_lib, 'fbprophet/stan_models')
        self.mkpath(target_dir)

        for model_type in ['linear', 'logistic']:
            with open('stan/prophet_{}_growth.stan'.format(model_type)) as f:
                model_code = f.read()
            sm = StanModel(model_code=model_code)
            with open(os.path.join(target_dir, '{}_growth.pkl'.format(model_type)), 'wb') as f:
                pickle.dump(sm, f)
Example #25
0
    def compile_stan_model(input_stan_filepath, output_model_filepath=None):
        with open(input_stan_filepath) as f:
            model_code = f.read()

        model = StanModel(model_code=model_code, verbose=True)

        if output_model_filepath is None:
            output_model_filepath = input_stan_filepath.replace(
                '.stan', '.pkl')
        with open(output_model_filepath, 'wb') as f:
            pickle.dump(model, f)
Example #26
0
def sparse_car_model(tobit_data: pd.DataFrame, ad_matrix):
    sparse_dict = get_sparse_modeldict(tobit_data, ad_matrix)
    sparse_model = StanModel(
        file=Path('models/sparse_tobitcar_students.stan').open(),
        extra_compile_args=["-w"])
    sparse_fit = sparse_model.sampling(sparse_dict,
                                       iter=4000,
                                       warmup=500,
                                       chains=4)
    print(sparse_fit.stansummary())
    return sparse_fit, sparse_model
Example #27
0
 def test_stanc_exception(self):
     model_code = 'parameters {real z;} model {z ~ no_such_distribution();}'
     assertRaisesRegex = self.assertRaisesRegexp if PY2 else self.assertRaisesRegex
     # distribution not found error
     with assertRaisesRegex(
             ValueError,
             r'Probability function must end in _lpdf or _lpmf\. Found'):
         stanc(model_code=model_code)
     with assertRaisesRegex(
             ValueError,
             r'Probability function must end in _lpdf or _lpmf\. Found'):
         StanModel(model_code=model_code)
Example #28
0
def compile_stan_models(target_dir, models_dir=MODELS_DIR):
    from pystan import StanModel
    model_path_list = MODELS_DIR.glob("*.stan")
    for model_path in model_path_list:
        model_type = model_path.stem
        target_name = model_type + ".pkl"
        target_path = target_dir / target_name
        with open(model_path) as f:
            model_code = f.read()
        model = StanModel(model_code=model_code, model_name=model_type)
        with open(target_path, "wb") as f:
            pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)
 def test_init_zero_exception_inf_grad(self):
     code = """
     parameters {
         real x;
     }
     model {
         target += 1 / log(x);
     }
     """
     sm = StanModel(model_code=code)
     with self.assertRaises(RuntimeError):
         sm.sampling(init='0', iter=1, chains=1)
Example #30
0
    def setUpClass(cls):
        covexpquad = """
          data {
          real rx1[5];
      }
      model {
          matrix[5,5] a;

          a = cov_exp_quad(rx1, 1, 1);
      }
          """
        cls.model = StanModel(model_code=covexpquad)