Exemple #1
0
def run_simulation_study(mi_in: MaudInput, true_params_raw):
    """Run a simulation study.

    :param mi_in: A MaudInput object
    :param true_params_raw: dictionary of param name -> true param values

    """

    # compile stan model
    here = os.path.dirname(os.path.abspath(__file__))
    stan_path = os.path.join(here, STAN_PROGRAM_RELATIVE_PATH)
    model = CmdStanModel(stan_file=stan_path)
    # generate input data for simulation
    input_data_sim = get_input_data(mi_in)
    # get all true values (including for non-centered parameters)
    true_params = enrich_true_values(true_params_raw, input_data_sim)
    # generate simulated measurements
    sim = model.sample(data=input_data_sim, inits=true_params, **SIM_CONFIG)
    # extract simulated measurements and add them to mi_in
    mi = add_measurements_to_maud_input(mi_in, sim, input_data_sim)
    # create new input data
    input_data_sample = get_input_data(mi)
    # sample
    samples = model.sample(data=input_data_sample,
                           inits=true_params,
                           **mi.config.cmdstanpy_config)
    return SimulationStudyOutput(input_data_sim, input_data_sample,
                                 true_params, sim, mi, samples)
def main():
    thisdir = os.path.dirname(os.path.realpath(__file__))

    model_fname = f'{thisdir}/bernoulli.stan'
    output_dir = f'{thisdir}/output_'
    num_draws = 400000

    # remove the output directory if it exists
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)

    # Load the bernoulli model
    model = CmdStanModel(stan_file=model_fname)

    # Sample the posterior distribution conditioned on some data
    with Timer(label='fit', verbose=True):
        fit = model.sample(data={
            'N': 5,
            'y': [0, 1, 0, 0, 0]
        },
                           output_dir=output_dir,
                           iter_sampling=num_draws)

    # Report the average value of the theta parameter
    # (this is probably not theoretically correct, just trying to get familiar)
    with Timer(label='Load draws', verbose=True):
        draws = fit.draws(
        )  # First dimension is the draw index, second is the chain index, and third is the column index
    print(f'{draws.shape[0]} draws; {draws.shape[1]} chains')
    theta_draws = draws[:, :, 7]  # 7th column is the theta
    print(np.mean(theta_draws))
def sample(
    stan_file: str,
    input_json: str,
    coords: dict,
    dims: dict,
    sample_kwargs: dict,
    cpp_options: Optional[dict],
    stanc_options: Optional[dict],
) -> InferenceData:
    """Run cmdstanpy.CmdStanModel.sample and return an InferenceData."""
    model = CmdStanModel(
        stan_file=stan_file,
        cpp_options=cpp_options,
        stanc_options=stanc_options,
    )
    with open(input_json, "r") as f:
        stan_input = json.load(f)
    coords["ix_train"] = [i - 1 for i in stan_input["ix_train"]]
    coords["ix_test"] = [i - 1 for i in stan_input["ix_test"]]
    mcmc = model.sample(data=input_json, **sample_kwargs)
    return az.from_cmdstan(
        posterior=mcmc.runset.csv_files,
        log_likelihood="llik",
        posterior_predictive="yrep",
        observed_data=input_json,
        coords=coords,
        dims=dims,
    )
def main():
    # These are adjustable parameters
    rho = 0.9 # rho should be <1
    N = 400
    iter_warmup = 20 # Number of warmup iterations
    iter_sampling = 100 # Number of sampling iterations
    ##################################

    # specify .stan file for this model
    thisdir = os.path.dirname(os.path.realpath(__file__))
    model_fname = f'{thisdir}/multi-normal.stan'

    with StanMonitor( # The stan monitor is a context manager
        label='multi-normal-example',
        # monitor these parameters
        parameter_names=["lp__", "accept_stat__", "stepsize__", "treedepth__", "n_leapfrog__", "divergent__", "energy__"]
    ) as monitor:
        # Load the model
        model = CmdStanModel(stan_file=model_fname)

        # Start sampling the posterior for this model/data
        # Use monitor._output_dir as the output directory
        fit = model.sample(
            data={'N': N, 'rho': rho},
            output_dir=monitor._output_dir,
            iter_sampling=iter_sampling,
            iter_warmup=iter_warmup,
            save_warmup=True
        )
Exemple #5
0
def run_model():
    model = CmdStanModel(stan_file="eight_schools.stan")

    data = {
        "J": 8,
        "y": [28, 8, -3, 7, -1, 1, 18, 12],
        "sigma": [15, 10, 16, 11, 9, 11, 10, 18]
    }

    fit1 = model.sample(data=data,
                        chains=4,
                        cores=4,
                        seed=1,
                        sampling_iters=1000,
                        warmup_iters=1000)

    # Increase the uncertainties
    data["sigma"] = [i * 2 for i in data["sigma"]]

    fit2 = model.sample(data=data,
                        chains=4,
                        cores=4,
                        seed=1,
                        sampling_iters=1000,
                        warmup_iters=1000)

    extra = [{"mu": 2.2, "tau": 1.3}]  # Add extra values (optional)

    save_compare_parameters(
        [fit1, fit2],
        labels=['Original', 'Larger uncertainties', 'Extra'],
        extra_values=extra,
        type=CompareParametersType.TEXT,  # or GITLAB_LATEX
        param_names=['mu', 'tau'])
Exemple #6
0
def run_stan(output_dir, settings: AnalysisSettings):
    """
    Run Stan model and return the samples from posterior distributions.

    Parameters
    ----------
    output_dir: str
        Directory where Stan's output will be created
    settings: AnalysisSettings
        Analysis settings.

    Returns
    -------
    cmdstanpy.CmdStanMCMC
        Stan's output containing samples of posterior distribution
        of parameters.
    """

    model = CmdStanModel(stan_file=settings.stan_model_path)

    fit = model.sample(data=settings.data,
                       seed=333,
                       adapt_delta=0.99,
                       max_treedepth=settings.max_treedepth,
                       iter_sampling=4000,
                       iter_warmup=1000,
                       chains=4,
                       cores=4,
                       show_progress=True,
                       output_dir=output_dir)

    # Make summaries and plots of parameter distributions
    save_analysis(fit, param_names=["r", "sigma"])

    return fit
Exemple #7
0
def main():
    here = os.path.dirname(os.path.realpath(__file__))
    model_path = os.path.join(here, RELATIVE_PATHS["model"])
    input_path = os.path.join(here, RELATIVE_PATHS["model_input"])
    output_dir = os.path.join(here, RELATIVE_PATHS["model_output_dir"])
    model = CmdStanModel(stan_file=model_path)
    mcmc = model.sample(data=input_path, **SAMPLE_ARGS)
    for f in mcmc.runset.csv_files:
        os.replace(f, standardise_csv_filename(f, output_dir))
Exemple #8
0
def stan_sampler_advi(model_code):
    sm = CmdStanModel(stan_file=model_code)
    fit = sm.variational(iter=svi_steps,
                         algorithm="fullrank",
                         output_samples=iterations,
                         tol_rel_obj=10)
    samples = pd.Series(
        fit.variational_sample[fit.column_names.index("theta")])
    return samples.iloc[np.random.permutation(
        len(samples))].reset_index(drop=True)
Exemple #9
0
def stan_sampler(model_code):
    sm = CmdStanModel(stan_file=model_code)
    fit = sm.sample(
        iter_sampling=iterations // num_chains,
        iter_warmup=warmup,
        chains=num_chains,
    )
    samples = fit.stan_variable("theta").theta
    return samples.iloc[np.random.permutation(
        len(samples))].reset_index(drop=True)
Exemple #10
0
def run_stan(output_dir, mydata):
    model = CmdStanModel(stan_file="eight_schools.stan")

    return model.sample(data=mydata,
                        chains=4,
                        cores=4,
                        seed=1,
                        sampling_iters=1000,
                        warmup_iters=1000,
                        output_dir=output_dir)  # Make sure to pass this in
Exemple #11
0
def run_model1_divorse_age(data, output_dir, sampling_iters, warmup_iters):
    model_path = "tarpan/testutils/a05_divorse/stan_model/divorse1_divorse_age.stan"
    model = CmdStanModel(stan_file=model_path)

    return model.sample(data=data,
                        chains=4,
                        cores=2,
                        sampling_iters=sampling_iters,
                        warmup_iters=warmup_iters,
                        output_dir=output_dir,
                        seed=1)
Exemple #12
0
def run_model():
    model = CmdStanModel(stan_file="eight_schools.stan")

    data = {
        "J": 8,
        "y": [28,  8, -3,  7, -1,  1, 18, 12],
        "sigma": [15, 10, 16, 11,  9, 11, 10, 18]
    }

    fit = model.sample(data=data, chains=4, cores=4, seed=1,
                       sampling_iters=1000, warmup_iters=1000)

    save_tree_plot([fit], param_names=['mu', 'tau', 'eta'])
Exemple #13
0
def run_model():
    model = CmdStanModel(stan_file="eight_schools.stan")

    data = {
        "J": 8,
        "y": [28, 8, -3, 7, -1, 1, 18, 12],
        "sigma": [15, 10, 16, 11, 9, 11, 10, 18]
    }

    fit = model.sample(data=data,
                       chains=4,
                       cores=4,
                       seed=1,
                       sampling_iters=1000,
                       warmup_iters=1000)

    # Change all path components:
    #   ~/tarpan/analysis/model1/normal.png
    save_tree_plot([fit],
                   info_path=InfoPath(path='~/tarpan',
                                      dir_name="analysis",
                                      sub_dir_name="model1",
                                      base_name="normal",
                                      extension="png"))

    # Change the file name:
    #   model_into/custom_location/my_summary.pdf
    save_tree_plot([fit], info_path=InfoPath(base_name="my_summary"))

    # Change the file type:
    #   model_into/custom_location/summary.png
    save_tree_plot([fit], info_path=InfoPath(extension="png"))

    # Change the sub-directory name:
    #   model_into/custom/summary.pdf
    save_tree_plot([fit], info_path=InfoPath(sub_dir_name="custom"))

    # Do not create sub-directory:
    #   model_into/summary.pdf
    save_tree_plot([fit],
                   info_path=InfoPath(sub_dir_name=InfoPath.DO_NOT_CREATE))

    # Change the default top directory name from `model_info`:
    #   my_files/custom_location/summary.pdf
    save_tree_plot([fit], info_path=InfoPath(dir_name='my_files'))

    # Change the root path to "tarpan" in your user's home directory
    #   ~/tarpan/model_info/custom_location/summary.pdf
    save_tree_plot([fit], info_path=InfoPath(path='~/tarpan'))
Exemple #14
0
def run_stan_model(stan_file, data, **kwargs):
    """
    Convenience function to compile, sample and diagnose a Stan model.
    
    Notes
    -----
    For prior predictive sampling (or to otherwise
    simulate data), pass `fixed_param=True`.
    https://cmdstanpy.readthedocs.io/en/latest/sample.html#example-generate-data-fixed-param-true
    """
    model = CmdStanModel(stan_file=stan_file)
    model.compile()
    fit = model.sample(data=data, **kwargs)
    fit.diagnose()
    return model, fit
Exemple #15
0
def run_model():
    model = CmdStanModel(stan_file="eight_schools.stan")

    data = {
        "J": 8,
        "y": [28,  8, -3,  7, -1,  1, 18, 12],
        "sigma": [15, 10, 16, 11,  9, 11, 10, 18]
    }

    fit = model.sample(data=data, chains=4, cores=4, seed=1,
                       sampling_iters=1000, warmup_iters=1000)

    # Make summary with custom HPDI values
    save_summary(fit, param_names=['mu', 'tau', 'eta.1'],
                 summary_params=SummaryParams(hpdis=[0.05, 0.99]))
Exemple #16
0
def run_model(data, output_dir):
    """
    Runs Stan model and saves fit to disk
    """

    model_path = "tarpan/testutils/a01_eight_schools/eight_schools.stan"
    model = CmdStanModel(stan_file=model_path)

    return model.sample(data=data,
                        chains=4,
                        cores=4,
                        sampling_iters=1000,
                        warmup_iters=1000,
                        seed=1,
                        output_dir=output_dir)
Exemple #17
0
def myfunc(output_dir, data):
    data["count"] += 1

    data = {
        "J": 8,
        "y": [28,  8, -3,  7, -1,  1, 18, 12],
        "sigma": [15, 10, 16, 11,  9, 11, 10, 18]
    }

    model_path = "tarpan/testutils/a01_eight_schools/eight_schools.stan"
    model = CmdStanModel(stan_file=model_path)

    return model.sample(data=data, chains=1, cores=1,
                        sampling_iters=1000, warmup_iters=1000,
                        output_dir=output_dir)
Exemple #18
0
def run_model3_treatment(data, output_dir, sampling_iters, warmup_iters):
    """
    Runs Stan model and saves fit to disk
    """

    model_path = "tarpan/testutils/a04_height/stan_model/height3_treatment.stan"
    model = CmdStanModel(stan_file=model_path)

    return model.sample(data=data,
                        chains=1,
                        cores=1,
                        sampling_iters=sampling_iters,
                        warmup_iters=warmup_iters,
                        output_dir=output_dir,
                        seed=1)
Exemple #19
0
def run_model(data, output_dir):
    """
    Runs Stan model and saves fit to disk
    """

    model_path = "tarpan/testutils/a03_cars/stan_model/cars.stan"
    model = CmdStanModel(stan_file=model_path)

    return model.sample(data=data,
                        chains=1,
                        cores=1,
                        sampling_iters=1000,
                        warmup_iters=500,
                        output_dir=output_dir,
                        seed=1)
Exemple #20
0
 def _compile_model(self, name):
     file_loc = self._get_stan_file_loc(name)
     model_hsh = StanCacheMixin._get_file_hash(file_loc)
     # model = StanModel(file=file_loc, model_name=name,
     #                   include_paths=self.model_dir)
     model = CmdStanModel(stan_file=file_loc, model_name=name)
     return model, model_hsh
Exemple #21
0
def run_bernoulli_fit():
    # specify Stan file, create, compile CmdStanModel object
    bernoulli_path = os.path.join(cmdstan_path(), 'examples', 'bernoulli',
                                  'bernoulli.stan')
    bernoulli_model = CmdStanModel(stan_file=bernoulli_path)
    bernoulli_model.compile()

    # specify data, fit the model
    bernoulli_data = {'N': 10, 'y': [0, 1, 0, 0, 0, 0, 0, 0, 0, 1]}
    # Show progress
    bernoulli_fit = bernoulli_model.sample(chains=4,
                                           cores=2,
                                           data=bernoulli_data,
                                           show_progress=True)

    # summarize the results (wraps CmdStan `bin/stansummary`):
    print(bernoulli_fit.summary())
Exemple #22
0
def run_stan_model(*, posterior, config):
    """
    Compile and run the stan model
    Return the summary Dataframe
    """
    stanfile = posterior.model.code_file_path(framework="stan")
    model = CmdStanModel(stan_file=stanfile)
    data = posterior.data.values()
    fit = model.sample(
        data=data,
        iter_warmup=config.warmups,
        iter_sampling=config.iterations,
        thin=config.thin,
        chains=config.chains,
        seed=config.seed,
    )
    return fit
Exemple #23
0
def run_stan(observed_values, uncertainties):
    data = {
        "y": observed_values,
        "uncertainties": uncertainties,
        "N": len(observed_values)
    }

    model = CmdStanModel(stan_file="stan_model/gaussian_mixture.stan")

    return model.sample(data=data,
                        seed=333,
                        adapt_delta=0.90,
                        max_treedepth=5,
                        sampling_iters=500,
                        warmup_iters=500,
                        chains=4,
                        cores=4)
Exemple #24
0
def run_model(data, output_dir):
    """
    Runs Stan model and saves fit to disk
    """

    model_path = "tarpan/testutils/a02_gaussian_mixture/stan_model/gaussian_mixture.stan"
    model = CmdStanModel(stan_file=model_path)

    return model.sample(data=data,
                        seed=333,
                        adapt_delta=0.90,
                        max_treedepth=5,
                        sampling_iters=1000,
                        warmup_iters=1000,
                        chains=4,
                        cores=4,
                        output_dir=output_dir)
Exemple #25
0
def run_model():
    model = CmdStanModel(stan_file="eight_schools.stan")

    data = {
        "J": 8,
        "y": [28, 8, -3, 7, -1, 1, 18, 12],
        "sigma": [15, 10, 16, 11, 9, 11, 10, 18]
    }

    fit = model.sample(data=data,
                       chains=4,
                       cores=4,
                       seed=1,
                       sampling_iters=1000,
                       warmup_iters=1000)

    # Creates summaries, traceplots and histograms in `model_info` directory
    save_analysis(fit)
Exemple #26
0
def generate_samples(
    study_name: str,
    measurements: pd.DataFrame,
    model_configurations: List[ModelConfiguration],
) -> None:
    """Run cmdstanpy.CmdStanModel.sample, do diagnostics and save results.

    :param study_name: a string
    """
    infds = {}
    for model_config in model_configurations:
        fit_name = f"{study_name}-{model_config.name}"
        print(f"Fitting model {fit_name}...")
        loo_file = os.path.join(LOO_DIR, f"loo_{fit_name}.pkl")
        infd_file = os.path.join(INFD_DIR, f"infd_{fit_name}.ncdf")
        json_file = os.path.join(JSON_DIR, f"input_data_{fit_name}.json")
        stan_input = model_config.stan_input_function(measurements)
        print(f"Writing input data to {json_file}")
        jsondump(json_file, stan_input)
        model = CmdStanModel(
            model_name=fit_name, stan_file=model_config.stan_file
        )
        print(f"Writing csv files to {SAMPLES_DIR}...")
        mcmc = model.sample(
            data=stan_input,
            output_dir=SAMPLES_DIR,
            **model_config.sample_kwargs,
        )
        print(mcmc.diagnose().replace("\n\n", "\n"))
        infd = az.from_cmdstanpy(
            mcmc, **model_config.infd_kwargs_function(measurements)
        )
        print(az.summary(infd))
        infds[fit_name] = infd
        print(f"Writing inference data to {infd_file}")
        infd.to_netcdf(infd_file)
        print(f"Writing psis-loo results to {loo_file}\n")
        az.loo(infd, pointwise=True).to_pickle(loo_file)
    if len(infds) > 1:
        comparison = az.compare(infds)
        print(f"Loo comparison:\n{comparison}")
        comparison.to_csv(os.path.join(LOO_DIR, "loo_comparison.csv"))
def test_ordered_ragged_array():
    """Test that the ordered_ragged_array function behaves as expected.
    
    The test ragged array: [[2], [3, 2], [5, 3, 2]]
    """
    here = os.path.dirname(os.path.realpath(__file__))
    stan_file = os.path.join(here, "stan/ordered_ragged_array_test_model.stan")
    include_path = os.path.join(here, "../src/stan")
    data = {
        "N": 3,
        "O": 6,
        "first_vals": [2, 3, 5],
        "n_elems": [1, 2, 3],
        "diffs": [1, 2, 1],
    }
    expected = [2., 3., 2., 5., 3., 2.]
    model = CmdStanModel(stan_file=stan_file,
                         stanc_options={"include_paths": [include_path]})
    samples = model.sample(data=data, fixed_param=True, iter_sampling=1)
    actual = samples.get_drawset(params=["actual"]).loc[0].tolist()
    assert actual == expected
Exemple #28
0
def run_model():
    model = CmdStanModel(stan_file="eight_schools.stan")

    data = {
        "J": 8,
        "y": [28, 8, -3, 7, -1, 1, 18, 12],
        "sigma": [15, 10, 16, 11, 9, 11, 10, 18]
    }

    fit1 = model.sample(data=data,
                        chains=4,
                        cores=4,
                        seed=1,
                        sampling_iters=1000,
                        warmup_iters=1000)

    # Increase the uncertainties
    data["sigma"] = [i * 2 for i in data["sigma"]]

    fit2 = model.sample(data=data,
                        chains=4,
                        cores=4,
                        seed=1,
                        sampling_iters=1000,
                        warmup_iters=1000)

    # Show extra markers in tree plot for comparison (optional)
    extra_values = [{
        "mu": 2.2,
        "tau": 1.3,
    }]

    # Supply legend labels (optional)
    tree_params = TreePlotParams()
    tree_params.labels = ["Model 1", "Model 2", "Exact"]

    save_tree_plot(fits=[fit1, fit2],
                   extra_values=extra_values,
                   param_names=['mu', 'tau'],
                   tree_params=tree_params)
def generate_fake_measurements(real_data: pd.DataFrame) -> pd.DataFrame:
    """Fake a table of measurements by simulating from the true model.

    You will need to customise this function to make sure it matches the data
    generating process you want to simulate from.

    :param real_data: dataframe of real data to copy

    """
    true_param_values = TRUE_PARAM_VALUES.copy()
    true_param_values["ability"] = np.random.normal(
        0, 1, real_data["name"].nunique())
    fake_data = real_data.copy()
    fake_data["score"] = 0
    name_to_ability = dict(
        zip(pd.factorize(fake_data["name"])[1], true_param_values["ability"]))
    fake_data["true_ability"] = fake_data["name"].map(name_to_ability)
    model = CmdStanModel(stan_file=TRUE_MODEL_CONFIG.stan_file)
    stan_input = TRUE_MODEL_CONFIG.stan_input_function(fake_data)
    mcmc = model.sample(stan_input,
                        inits=true_param_values,
                        fixed_param=True,
                        iter_sampling=1)
    return fake_data.assign(score=mcmc.stan_variable("yrep")[0] - 6)
Exemple #30
0
from cmdstanpy import CmdStanModel
from tarpan.cmdstanpy.summary import print_summary

model = CmdStanModel(stan_file="stan_model/ex02.06.stan")

data = dict(w=6, l=3)
fit = model.sample(data=data, show_progress=True)
print_summary(fit)