Beispiel #1
0
def get_model_results_dict():
    df = get_model_input_df()
    model_res_dict = {}

    # fixed effects meta analyses (lnVR and lnCVR)
    for model in ['fema', 'rema'
                  ]:  # lnVR, # random effects meta analyses (lnVR and lnCVR)
        stan_model = compile_model(os.path.join(stan_model_path,
                                                f'{model}.stan'),
                                   model_name=model)
        for effect_statistic in ['lnVR', 'lnCVR']:
            data_dict = get_data_dict(df, effect_statistic)

            fit = stan_model.sampling(data=data_dict,
                                      iter=4000,
                                      warmup=1000,
                                      chains=3,
                                      control={'adapt_delta': 0.99},
                                      check_hmc_diagnostics=True,
                                      seed=1)

            data = az.from_pystan(
                posterior=fit,
                posterior_predictive=['Y_pred'],
                observed_data=['Y'],
                log_likelihood='log_lik',
            )

            model_res_dict[f'{model}_{effect_statistic}'] = data

    model = 'remr'
    stan_model = compile_model(os.path.join(stan_model_path, f'{model}.stan'),
                               model_name=model)
    effect_statistic = 'lnVR'
    data_dict = get_data_dict(df, effect_statistic)

    fit = stan_model.sampling(data=data_dict,
                              iter=4000,
                              warmup=1000,
                              chains=3,
                              control={'adapt_delta': 0.99},
                              check_hmc_diagnostics=True,
                              seed=1)
    pystan.check_hmc_diagnostics(fit)

    data = az.from_pystan(
        posterior=fit,
        posterior_predictive=['Y_pred'],
        observed_data=['Y_meas', 'X_meas'],
        log_likelihood='log_lik',
    )
    model_res_dict[f'{model}_{effect_statistic}'] = data
    return model_res_dict
Beispiel #2
0
def fit_models(stan_data, stan_models, **hmc_args):

    fits = []
    az_fits = {k: None for k in fit_models}
    for fit_model in stan_models:

        print('Fitting %s model' % fit_model)
        model = pkl.load(
            open(CODE_DIR + 'assignment_errors/%s_model.stan' % fit_model,
                 'rb'))

        fit = model.sampling(data=stan_data, **hmc_args)

        model_params = {
            'observed_data': 'y',
            'log_likelihood': {
                'y': 'log_lik'
            },
            'posterior_predictive': 'err_hat'
        }
        fit_az = az.from_pystan(posterior=fit, **model_params)

        fits.append(fit)
        az_fits[fit_model] = fit_az

    comps = az.compare(az_fits)

    return comps, fits
Beispiel #3
0
def get_varying_intercept_model_results():
    # read in Cipriani data
    df = get_model_input_df()
    data_dict = {
        'N': df.shape[0],
        'Y_meas': df['lnSD'].values,
        'X_meas': df['lnMean'].values,
        'SD_Y': np.sqrt(df['var_lnSD'].values),
        'SD_X': np.sqrt(df['var_lnMean'].values),
        'K': len(df.scale.unique()),
        'scale_group': df.scale_rank.values
    }
    varying_intercept_stan_model = compile_model(
        os.path.join(stan_model_path, 'varying_intercept_regression.stan'),
        model_name='varying_intercept_regression')
    fit = varying_intercept_stan_model.sampling(data=data_dict,
                                                iter=4000,
                                                warmup=1000,
                                                chains=3,
                                                control={'adapt_delta': 0.99},
                                                check_hmc_diagnostics=True,
                                                seed=1)
    pystan.check_hmc_diagnostics(fit)
    data = az.from_pystan(
        posterior=fit,
        posterior_predictive=['Y_pred'],
        observed_data=['X_meas', 'Y_meas'],
        log_likelihood='log_lik',
    )
    return data
Beispiel #4
0
def print_fit(*args: TaskModel, ic: str = 'looic') -> pd.DataFrame:
    """Print model-fits (mean LOOIC or WAIC values) of hbayesdm models.

    Parameters
    ----------
    args
        Output instances of running hbayesdm model functions.
    ic
        Information criterion (defaults to 'looic').

    Returns
    -------
    pd.DataFrame
        Model-fit info per each hbayesdm output given as argument(s).
    """
    ic_options = ('looic', 'waic')
    if ic not in ic_options:
        raise RuntimeError('Information Criterion (ic) must be one of ' +
                           repr(ic_options))
    dataset_dict = {
        model_data.model: az.from_pystan(model_data.fit,
                                         log_likelihood='log_lik')
        for model_data in args
    }

    ic = 'loo' if ic == 'looic' else 'waic'
    return az.compare(dataset_dict=dataset_dict, ic=ic)
Beispiel #5
0
    def test_empty_parameter(self):
        if pystan_version() == 2:
            model_code = """
                parameters {
                    real y;
                    vector[3] x;
                    vector[0] a;
                    vector[2] z;
                }
                model {
                    y ~ normal(0,1);
                }
            """
            from pystan import StanModel  # pylint: disable=import-error

            model = StanModel(model_code=model_code)
            fit = model.sampling(iter=10,
                                 chains=2,
                                 check_hmc_diagnostics=False)
            posterior = from_pystan(posterior=fit)
            test_dict = {
                "posterior": ["y", "x", "z"],
                "sample_stats": ["diverging"]
            }
            fails = check_multiple_attrs(test_dict, posterior)
            assert not fails
Beispiel #6
0
def get_waic_and_loo(fit):
    """Compute WAIC and LOO from a fit instance"""
    idata = az.from_pystan(fit, log_likelihood="llx")
    result = {}
    result.update(dict(az.loo(idata, scale='deviance')))
    result.update(dict(az.waic(idata, scale='deviance')))
    return result
Beispiel #7
0
    def test_index_order(self, data, eight_schools_params):
        """Test 0-indexed data."""
        # Skip test if pystan not installed
        pystan = importorskip("pystan")  # pylint: disable=import-error

        fit = data.model.sampling(data=eight_schools_params)
        if pystan.__version__ >= "2.18":
            # make 1-indexed to 0-indexed
            for holder in fit.sim["samples"]:
                new_chains = OrderedDict()
                for i, (key, values) in enumerate(holder.chains.items()):
                    if "[" in key:
                        name, *shape = key.replace("]", "").split("[")
                        shape = [
                            str(int(item) - 1) for items in shape
                            for item in items.split(",")
                        ]
                        key = name + "[{}]".format(",".join(shape))
                    new_chains[key] = np.full_like(values, fill_value=float(i))
                setattr(holder, "chains", new_chains)
            fit.sim["fnames_oi"] = list(fit.sim["samples"][0].chains.keys())
        idata = from_pystan(posterior=fit)
        assert idata is not None
        for j, fpar in enumerate(fit.sim["fnames_oi"]):
            if fpar == "lp__":
                continue
            par, *shape = fpar.replace("]", "").split("[")
            assert hasattr(idata.posterior, par)
            if shape:
                shape = [slice(None), slice(None)] + list(map(int, shape))
                assert idata.posterior[par][tuple(
                    shape)].values.mean() == float(j)
            else:
                assert idata.posterior[par].values.mean() == float(j)
Beispiel #8
0
 def test_invalid_fit(self, data):
     if pystan_version() == 2:
         model = data.model
         model_data = {
             "J": 8,
             "y": np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0]),
             "sigma": np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0]),
         }
         fit_test_grad = model.sampling(
             data=model_data, test_grad=True, check_hmc_diagnostics=False
         )
         with pytest.raises(AttributeError):
             _ = from_pystan(posterior=fit_test_grad)
         fit = model.sampling(data=model_data, iter=100, chains=1, check_hmc_diagnostics=False)
         del fit.sim["samples"]
         with pytest.raises(AttributeError):
             _ = from_pystan(posterior=fit)
Beispiel #9
0
 def __init__(self, data, dataset, ci=95.):
     if az is None:
         raise ValueError("ArviZ package must be installed in order to work"
                          " with the BayesianMetaRegressionResults class.")
     if data.__class__.__name__ == 'StanFit4Model':
         data = az.from_pystan(data)
     self.data = data
     self.dataset = dataset
     self.ci = ci
Beispiel #10
0
def fit_bhv_model(data,
                  model_path=bmp,
                  targ_field='LABthetaTarget',
                  dist_field='LABthetaDist',
                  resp_field='LABthetaResp',
                  prior_dict=None,
                  stan_iters=2000,
                  stan_chains=4,
                  arviz=mixture_arviz,
                  adapt_delta=.9,
                  diagnostics=True,
                  **stan_params):
    if prior_dict is None:
        prior_dict = default_prior_dict
    targs_is = data[targ_field]
    session_list = np.array(data[['animal', 'date']])
    mapping_list = []
    session_nums = np.array([], dtype=int)
    for i, x in enumerate(targs_is):
        sess = np.ones(len(x), dtype=int) * (i + 1)
        session_nums = np.concatenate((session_nums, sess))
        indices = x.index
        sess_info0 = (str(session_list[i, 0]), ) * len(x)
        sess_info1 = (str(session_list[i, 1]), ) * len(x)
        mapping_list = mapping_list + list(zip(indices, sess_info0,
                                               sess_info1))
    mapping_dict = {i: mapping_list[i] for i in range(len(session_nums))}
    targs = np.concatenate(targs_is, axis=0)
    dists = np.concatenate(data[dist_field], axis=0)
    resps = np.concatenate(data[resp_field], axis=0)
    errs = u.normalize_periodic_range(targs - resps)
    dist_errs = u.normalize_periodic_range(dists - resps)
    dists_per = u.normalize_periodic_range(dists - targs)
    stan_data = dict(T=dist_errs.shape[0],
                     S=len(targs_is),
                     err=errs,
                     dist_err=dist_errs,
                     run_ind=session_nums,
                     dist_loc=dists_per,
                     **prior_dict)
    control = {
        'adapt_delta': stan_params.pop('adapt_delta', .8),
        'max_treedepth': stan_params.pop('max_treedepth', 10)
    }
    sm = pickle.load(open(model_path, 'rb'))
    fit = sm.sampling(data=stan_data,
                      iter=stan_iters,
                      chains=stan_chains,
                      control=control,
                      **stan_params)
    if diagnostics:
        diag = ps.diagnostics.check_hmc_diagnostics(fit)
    else:
        diag = None
    fit_av = az.from_pystan(posterior=fit, **arviz)
    return fit, diag, fit_av, stan_data, mapping_dict
Beispiel #11
0
 def get_inference_data4(self, data):
     """multiple vars as lists."""
     return from_pystan(
         posterior=data.obj,
         posterior_predictive=None,
         prior=data.obj,
         prior_predictive=None,
         observed_data="y",
         coords=None,
         dims=None,
     )
Beispiel #12
0
 def get_inference_data4(self, data):
     """minimal input."""
     return from_pystan(
         posterior=data.obj,
         posterior_predictive=None,
         prior=data.obj,
         prior_predictive=None,
         coords=None,
         dims=None,
         posterior_model=data.model,
         prior_model=data.model,
     )
Beispiel #13
0
 def get_inference_data3(self, data, eight_schools_params):
     """multiple vars as lists."""
     return from_pystan(
         posterior=data.obj,
         posterior_predictive=["y_hat", "log_lik"],
         prior=data.obj,
         prior_predictive=["y_hat", "log_lik"],
         observed_data="y",
         coords={"school": np.arange(eight_schools_params["J"])},
         dims={"theta": ["school"], "y": ["school"], "y_hat": ["school"], "eta": ["school"]},
         posterior_model=data.model,
         prior_model=data.model,
     )
Beispiel #14
0
 def get_inference_data(self):
     return from_pystan(fit=self.obj,
                        posterior_predictive='y_hat',
                        observed_data=['y'],
                        log_likelihood='log_lik',
                        coords={'school': np.arange(self.data['J'])},
                        dims={
                            'theta': ['school'],
                            'y': ['school'],
                            'log_lik': ['school'],
                            'y_hat': ['school'],
                            'theta_tilde': ['school']
                        })
def create_diagnostic_plots(idx,pdf_filename,fit,diag_pars,niter,nchain):

    # Converting the Stan FIT object to Arviz InfereceData
    samples   = fit.extract(permuted=True) # Extracting parameter samples
    data      = az.from_pystan(fit)
    tmp       = data.posterior
    var_names = list(tmp.data_vars)

    # Filtering the list of parameters to plot
    unwanted  = {'losvd','spec','conv_spec','poly','bestfit','losvd_','losvd_mod','spec_pred','log_likelihood'}
    vars_main = [e for e in var_names if e not in unwanted]
   
    # Reading diagnostic parameters
    accept_stat, stepsize,  treedepth = np.zeros((niter,nchain)), np.zeros((niter,nchain)) , np.zeros((niter,nchain))
    n_leapfrog,  divergent, energy    = np.zeros((niter,nchain)), np.zeros((niter,nchain)) , np.zeros((niter,nchain))  
    for j in range(nchain):
        accept_stat[:,j] = diag_pars[j]['accept_stat__']
        stepsize[:,j]    = diag_pars[j]['stepsize__']
        treedepth[:,j]   = diag_pars[j]['treedepth__']
        n_leapfrog[:,j]  = diag_pars[j]['n_leapfrog__']
        divergent[:,j]   = diag_pars[j]['divergent__']
        energy[:,j]      = diag_pars[j]['energy__']    
 
    # Creating the plot in multiple PDF papges
    pdf_pages = PdfPages(pdf_filename)

    print(" - Sampler params")
    plot_sampler_params(idx,accept_stat,stepsize,treedepth,n_leapfrog,divergent,energy)
    pdf_pages.savefig()
    print(" - Chains")
    plot_chains(samples,vars_main)
    pdf_pages.savefig()
   #  print(" - Trace plot [Main params]")
   #  az.plot_trace(data, var_names=vars_main)
   #  pdf_pages.savefig()
   #  print(" - Trace plot [LOSVD]")
   #  az.plot_trace(data, var_names=['losvd'])
   #  pdf_pages.savefig()
    print(" - Pair plot")
    az.plot_pair(data, var_names=vars_main, divergences=True, kind='kde', fill_last=False)
    pdf_pages.savefig()
    print(" - Autocorr plot")
    az.plot_autocorr(data, var_names=vars_main)
    pdf_pages.savefig()
    print(" - Energy plot")
    az.plot_energy(data)
    pdf_pages.savefig()
    pdf_pages.close()   

    return
Beispiel #16
0
 def get_inference_data5(self, data):
     """minimal input."""
     return from_pystan(
         posterior=data.obj,
         posterior_predictive=None,
         prior=data.obj,
         prior_predictive=None,
         coords=None,
         dims=None,
         posterior_model=data.model,
         log_likelihood=False,
         prior_model=data.model,
         save_warmup=pystan_version() == 2,
     )
Beispiel #17
0
 def get_inference_data(self):
     return from_pystan(
         fit=self.obj,
         posterior_predictive="y_hat",
         observed_data=["y"],
         log_likelihood="log_lik",
         coords={"school": np.arange(self.data["J"])},
         dims={
             "theta": ["school"],
             "y": ["school"],
             "log_lik": ["school"],
             "y_hat": ["school"],
             "theta_tilde": ["school"],
         },
     )
Beispiel #18
0
    def fit(self, y1, y2, w1=None, w2=None):
        self.mcmc_ = get_mcmc(self.model, y1, y2, w1, w2, **self.kwargs)

        self.data_ = az.from_pystan(
            posterior=self.mcmc_,
            posterior_predictive=['y1_pred', 'y2_pred'],
            observed_data=['y1', 'y2'],
            log_likelihood='log_lik',
            coords={
                'group_mu': ['Group 1', 'Group 2'],
                'group_sigma': ['Group 1', 'Group 2']
            },
            dims={
                'mu': ['group_mu'],
                'sigma': ['group_sigma']
            })
Beispiel #19
0
 def get_inference_data2(self):
     # dictionary
     observed_data = {'y_hat': self.data['y']}
     # ndarray
     log_likelihood = self.obj.extract('log_lik', permuted=False)['log_lik']
     return from_pystan(fit=self.obj,
                        posterior_predictive='y_hat',
                        observed_data=observed_data,
                        log_likelihood=log_likelihood,
                        coords={'school': np.arange(self.data['J'])},
                        dims={
                            'theta': ['school'],
                            'y': ['school'],
                            'log_lik': ['school'],
                            'y_hat': ['school'],
                            'theta_tilde': ['school']
                        })
Beispiel #20
0
def get_subgroup_models():
    df = get_formatted_data()

    # drug class subgroup analysis
    model_res_dict = {}

    for drug_class in DRUG_CLASSES:
        study_ids = df.query(f'drug_class == "{drug_class}"').study_id.unique()
        df_sub = df[(df.study_id.isin(study_ids))
                    & (df.drug_class.isin([drug_class, 'placebo']))].copy()
        placebo_controlled_study_ids = set(df_sub.query('is_active == 1')['study_id']) \
            .intersection(df_sub.query('is_active == 0')['study_id'])
        df_sub = df_sub[df_sub.study_id.isin(placebo_controlled_study_ids)]

        for column in ['study_id', 'scale', 'drug_class']:
            df_sub = add_rank_column(df_sub, column)

        df_sub = aggregate_treatment_arms(df_sub)
        df_sub = get_variability_effect_sizes(df_sub)

        model = 'remr'
        stan_model = compile_model(os.path.join(stan_model_path,
                                                f'{model}.stan'),
                                   model_name=model)

        data_dict = get_data_dict(df_sub, 'lnVR')

        fit = stan_model.sampling(data=data_dict,
                                  iter=4000,
                                  warmup=1000,
                                  chains=3,
                                  control={'adapt_delta': 0.99},
                                  check_hmc_diagnostics=True,
                                  seed=1)
        pystan.check_hmc_diagnostics(fit)

        data = az.from_pystan(
            posterior=fit,
            posterior_predictive=['Y_pred'],
            observed_data=['Y_meas', 'X_meas'],
            log_likelihood='log_lik',
        )

        model_res_dict[drug_class] = data
    return model_res_dict
Beispiel #21
0
 def get_inference_data3(self, data, eight_schools_params):
     """log_likelihood as a ndarray."""
     # ndarray
     log_likelihood = pystan_extract_normal(data.obj, "log_lik")["log_lik"]
     return from_pystan(
         fit=data.obj,
         posterior_predictive=["y_hat"],
         observed_data=["y"],
         log_likelihood=log_likelihood,
         coords={"school": np.arange(eight_schools_params["J"])},
         dims={
             "theta": ["school"],
             "y": ["school"],
             "log_lik": ["school"],
             "y_hat": ["school"],
             "theta_tilde": ["school"],
         },
     )
Beispiel #22
0
 def get_inference_data(self, data, eight_school_params):
     """vars as str."""
     return from_pystan(
         posterior=data.obj,
         posterior_predictive="y_hat",
         prior=data.obj,
         prior_predictive="y_hat",
         observed_data="y",
         log_likelihood="log_lik",
         coords={"school": np.arange(eight_school_params["J"])},
         dims={
             "theta": ["school"],
             "y": ["school"],
             "log_lik": ["school"],
             "y_hat": ["school"],
             "theta_tilde": ["school"],
         },
     )
Beispiel #23
0
 def get_inference_data2(self):
     # dictionary
     observed_data = {"y_hat": self.data["y"]}
     # ndarray
     log_likelihood = self.obj.extract("log_lik", permuted=False)["log_lik"]
     return from_pystan(
         fit=self.obj,
         posterior_predictive="y_hat",
         observed_data=observed_data,
         log_likelihood=log_likelihood,
         coords={"school": np.arange(self.data["J"])},
         dims={
             "theta": ["school"],
             "y": ["school"],
             "log_lik": ["school"],
             "y_hat": ["school"],
             "theta_tilde": ["school"],
         },
     )
Beispiel #24
0
 def get_inference_data(self, data, eight_school_params):
     """log_likelihood as a var."""
     prior = pystan_extract_unpermuted(data.obj)
     prior = {"theta_test": prior["theta"]}
     return from_pystan(
         fit=data.obj,
         prior=prior,
         posterior_predictive="y_hat",
         observed_data=["y"],
         log_likelihood="log_lik",
         coords={"school": np.arange(eight_school_params["J"])},
         dims={
             "theta": ["school"],
             "y": ["school"],
             "log_lik": ["school"],
             "y_hat": ["school"],
             "theta_tilde": ["school"],
         },
     )
Beispiel #25
0
    def run(self, samples=1000, chains=1, **kwargs):  # pylint: disable=arguments-differ
        """
        Run the Stan sampler.

        Parameters
        ----------
        samples : int
            Number of samples to obtain (in each chain).
        chains : int
            Number of chains to use.
        kwargs : dict
            Optional keyword arguments passed onto the PyStan StanModel.sampling() call.

        Returns:
        ----------
        An ArviZ InferenceData instance.
        """
        self.fit = self.stan_model.sampling(data=self.X, iter=samples, chains=chains, **kwargs)
        return from_pystan(self.fit)
Beispiel #26
0
    def test_empty_parameter(self):
        if pystan_version() == 2:
            model_code = """
                parameters {
                    real y;
                    vector[0] z;
                }
                model {
                    y ~ normal(0,1);
                }
            """
            from pystan import StanModel

            model = StanModel(model_code=model_code)
            fit = model.sampling(iter=10, chains=2, check_hmc_diagnostics=False)
            posterior = from_pystan(posterior=fit)
            assert hasattr(posterior, "posterior")
            assert hasattr(posterior.posterior, "y")
            assert not hasattr(posterior.posterior, "z")
Beispiel #27
0
def get_baseline_severity_model():
    df = prepare_data()

    effect_statistic = 'lnVR'
    data_dict = {
        'N': len(df.study_id.unique()),
        'Y_meas': df.groupby(['study_id']).agg({effect_statistic: 'first'}).reset_index()[effect_statistic].values,
        'X_meas': df.groupby(['study_id']).agg({'lnRR': 'first'}).reset_index()['lnRR'].values,
        'SD_Y': np.sqrt(df.groupby(['study_id']).agg(
            {f'var_{effect_statistic}': 'first'}).reset_index()[f'var_{effect_statistic}'].values),
        'SD_X': np.sqrt(df.groupby(['study_id']).agg(
            {'var_lnRR': 'first'}).reset_index()['var_lnRR'].values),
        'X0': df.groupby(['study_id']).apply(
            lambda x: np.sum(x['baseline'] * x['N']) / np.sum(x['N'])
        ).reset_index()[0].values,
        'run_estimation': 1
    }

    stan_model = compile_model(
        os.path.join(stan_model_path, 'remr_bs.stan'),
        model_name='remr_bs'
    )

    fit = stan_model.sampling(
        data=data_dict,
        iter=4000,
        warmup=1000,
        chains=3,
        control={'adapt_delta': 0.99},
        check_hmc_diagnostics=True,
        seed=1
    )
    pystan.check_hmc_diagnostics(fit)

    data = az.from_pystan(
        posterior=fit,
        posterior_predictive=['Y_pred'],
        observed_data=['Y_meas', 'X_meas', 'X0'],
        log_likelihood='log_lik',
    )
    return data
Beispiel #28
0
def get_simulation_results():
    data_dict = {
        'N': 1000,
        'rho': -0.4,
        'sd_te': 6.5,
        'sd_m': 0.001,
        'lambda': 0.2,
        'theta': 0.9
    }
    simulation_stan_model = compile_model(os.path.join(stan_model_path,
                                                       'simulation.stan'),
                                          model_name='simulation')
    fit = simulation_stan_model.sampling(data=data_dict,
                                         warmup=500,
                                         iter=2500,
                                         chains=2,
                                         check_hmc_diagnostics=True,
                                         seed=1)
    pystan.check_hmc_diagnostics(fit)
    data = az.from_pystan(posterior=fit)
    return data
Beispiel #29
0
 def get_inference_data3(self, data, eight_schools_params):
     """multiple vars as lists."""
     return from_pystan(
         posterior=data.obj,
         posterior_predictive=["y_hat", "log_lik"],  # wrong, but fine for testing
         predictions=["y_hat", "log_lik"],  # wrong, but fine for testing
         prior=data.obj,
         prior_predictive=["y_hat", "log_lik"],  # wrong, but fine for testing
         constant_data=["sigma", "y"],  # wrong, but fine for testing
         predictions_constant_data=["sigma", "y"],  # wrong, but fine for testing
         coords={"school": np.arange(eight_schools_params["J"])},
         dims={
             "theta": ["school"],
             "y": ["school"],
             "sigma": ["school"],
             "y_hat": ["school"],
             "eta": ["school"],
         },
         posterior_model=data.model,
         prior_model=data.model,
     )
Beispiel #30
0
def run_model(rankings,
              survey_head_to_heads,
              stan_file=STAN_FILE,
              model_config='combined'):
    n_episode_contestant = rankings.groupby(
        'episode_id')['contestant_id'].nunique()
    episode_rank_counts = (rankings.groupby(
        ['episode_id', 'rank']).size().unstack().fillna(0).astype(int))
    contestants = rankings.groupby('contestant_id').first()
    contestants['id_stan'] = range(1, len(contestants) + 1)
    rankings = rankings.join(contestants['id_stan'], on='contestant_id')
    survey_head_to_heads = (survey_head_to_heads.join(
        contestants['id_stan'].rename('id_stan_own'),
        on='own').join(contestants['id_stan'].rename('id_stan_opp'), on='opp'))
    input_data = {
        'N': len(rankings),
        'K': len(PREDICTORS),
        'C': len(contestants),
        'E': rankings['episode_id'].nunique(),
        'X': contestants[PREDICTORS].fillna(0).values,
        'N_episode_contestant': n_episode_contestant.values,
        'episode_rank': rankings['rank'].values,
        'contestant': rankings['id_stan'].values,
        'N_survey': len(survey_head_to_heads),
        'survey_contestant': survey_head_to_heads['id_stan_own'].values,
        'survey_opponent': survey_head_to_heads['id_stan_opp'].values,
        'survey_count': survey_head_to_heads['count'].values,
        'survey_wins': survey_head_to_heads['wins'].values
    }
    model = StanModel_cache(file=stan_file)
    fit = model.sampling(data={**input_data, **model_config})
    return arviz.from_pystan(fit,
                             coords={
                                 'contestant': contestants.index,
                                 'predictor': PREDICTORS
                             },
                             dims={
                                 'ability': ['contestant'],
                                 'beta': ['predictor']
                             })