Python to_netcdf 예제들, arviz.to_netcdf Python 예제들

예제 #1

0

파일 보기

파일: test_model.py 프로젝트: bwengals/pymc3

 def test_nested_model_to_netcdf(self, tmp_path):
     with pm.Model("scope") as model:
         b = pm.Normal("var")
         trace = pm.sample(100, tune=0)
     az.to_netcdf(trace, tmp_path / "trace.nc")
     trace1 = az.from_netcdf(tmp_path / "trace.nc")
     assert "scope::var" in trace1.posterior

예제 #2

0

파일 보기

파일: gnss_handler_add_new_rule.py 프로젝트: mfkiwl/Hoegaarden

def bayes_multiple_detector_each_sigma(t, s, n):
    scala = 1000
    with pm.Model() as abrupt_model:
        sigma = pm.Normal('sigma', mu=0.02 * scala, sigma=0.015 * scala)
        # sigma = pm.Uniform('sigma', 5, 15)
        mu = pm.Uniform("mu1", -1.5 * scala, -1.4 * scala)
        tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max())

        for i in np.arange(2, n + 2):
            _mu = pm.Uniform("mu" + str(i), -1.6 * scala, -1.4 * scala)
            mu = T.switch(tau >= t, mu, _mu)
            if i < (n + 1):
                ttau = pm.DiscreteUniform("tau" + str(i), tau, t.max())
                tau = ttau

        tau1 = abrupt_model["tau1"]
        tau2 = abrupt_model["tau2"]
        dtau = pm.DiscreteUniform('dtau', tau1 + 500, tau2)

        s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s)
    g = pm.model_to_graphviz(abrupt_model)
    g.view()
    with abrupt_model:
        # pm.find_MAP()
        trace = pm.sample(20000, tune=5000)
        az.plot_trace(trace)
        az.to_netcdf(trace, getpath('tracepath') + 'bd9_4_add_new_rule')
        plt.show()
        pm.summary(trace)
    return trace

예제 #3

0

파일 보기

def bayes_multiple_detector_I(t, s, n, tracename):
    with pm.Model() as abrupt_model:
        sigma = pm.Normal('sigma', mu=30, sigma=5)
        # sigma = pm.Uniform('sigma', 5, 15)
        mu = pm.Uniform("mu1", -30, 30)
        tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max())

        for i in np.arange(2, n + 2):
            _mu = pm.Uniform("mu" + str(i), -100, 0)
            mu = T.switch(tau >= t, mu, _mu)
            if (i < (n + 1)):
                tau = pm.DiscreteUniform("tau" + str(i), tau, t.max())
        # add random walk
        # sigma_rw = pm.Uniform("sigma_rw", 0, 10)
        g_rw = pm.GaussianRandomWalk("g_rw", tau=1, shape=len(s))
        s_obs = pm.Normal("s_obs", mu=g_rw + mu, sigma=sigma, observed=s)
    # g = pm.model_to_graphviz(abrupt_model)
    # g.view()
    with abrupt_model:
        pm.find_MAP()
        trace = pm.sample(5000, tune=1000)
        az.plot_trace(trace)
        plt.show()
        az.plot_autocorr(trace)
        plt.show()
        az.to_netcdf(trace, getpath('tracepath') + tracename)
        pm.summary(trace)
    return trace

예제 #4

0

파일 보기

파일: test_data.py 프로젝트: StanczakDominik/arviz

    def test_io_function(self, data, eight_schools_params):
        # create inference data and assert all attributes are present
        inference_data = self.get_inference_data(  # pylint: disable=W0612
            data, eight_schools_params)
        test_dict = {
            "posterior": ["eta", "theta", "mu", "tau"],
            "posterior_predictive": ["eta", "theta", "mu", "tau"],
            "sample_stats": ["eta", "theta", "mu", "tau"],
            "prior": ["eta", "theta", "mu", "tau"],
            "prior_predictive": ["eta", "theta", "mu", "tau"],
            "sample_stats_prior": ["eta", "theta", "mu", "tau"],
            "observed_data": ["J", "y", "sigma"],
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails

        # check filename does not exist and save InferenceData
        here = os.path.dirname(os.path.abspath(__file__))
        data_directory = os.path.join(here, "..", "saved_models")
        filepath = os.path.join(data_directory, "io_function_testfile.nc")
        # az -function
        to_netcdf(inference_data, filepath)

        # Assert InferenceData has been saved correctly
        assert os.path.exists(filepath)
        assert os.path.getsize(filepath) > 0
        inference_data2 = from_netcdf(filepath)
        fails = check_multiple_attrs(test_dict, inference_data2)
        assert not fails
        os.remove(filepath)
        assert not os.path.exists(filepath)

예제 #5

0

파일 보기

 def test_io_function(self, data, eight_schools_params):
     inference_data = self.get_inference_data(  # pylint: disable=W0612
         data, eight_schools_params)
     assert hasattr(inference_data, "posterior")
     here = os.path.dirname(os.path.abspath(__file__))
     data_directory = os.path.join(here, "saved_models")
     filepath = os.path.join(data_directory, "io_function_testfile.nc")
     # az -function
     to_netcdf(inference_data, filepath)
     assert os.path.exists(filepath)
     assert os.path.getsize(filepath) > 0
     inference_data2 = from_netcdf(filepath)
     assert hasattr(inference_data2, "posterior")
     os.remove(filepath)
     assert not os.path.exists(filepath)

예제 #6

0

파일 보기

def hierarchical_reg_reference(samples=2000, target_df=None):

    """Runs a hierarchical model over the reference data set."""
    _, _, dataframe = load_data()

    if target_df is None:
        target_df = pd.DataFrame({})

    else:
        del target_df['bmrb_code']
        dataframe = dataframe[dataframe.protein != '1UBQ']
        dataframe = pd.concat([dataframe, target_df], ignore_index=True)


    mean_teo = dataframe["ca_teo"].mean()
    mean_exp = dataframe["ca_exp"].mean()
    std_teo = dataframe["ca_teo"].std()
    std_exp = dataframe["ca_exp"].std()

    ca_exp = (dataframe.ca_exp - mean_exp) / std_exp
    ca_teo = (dataframe.ca_teo - mean_teo) / std_teo

    categories = pd.Categorical(dataframe["res"])
    index = categories.codes
    N = len(np.unique(index))

    with pm.Model() as model:
        # hyper-priors
        alpha_sd = pm.HalfNormal("alpha_sd", 1.0)
        beta_sd = pm.HalfNormal("beta_sd", 1.0)
        sigma_sd = pm.HalfNormal("sigma_sd", 1.0)
        # priors
        α = pm.Normal("α", 0, alpha_sd, shape=N)
        β = pm.HalfNormal("β", beta_sd, shape=N)
        σ = pm.HalfNormal("σ", sigma_sd, shape=N)
        # linear model
        μ = pm.Deterministic("μ", α[index] + β[index] * ca_teo)
        # likelihood
        cheshift = pm.Normal("cheshift", mu=μ, sigma=σ[index], observed=ca_exp)
        idata = pm.sample(samples, tune=2000, random_seed=18759, target_accept=0.9, return_inferencedata=True)
        pps = pm.sample_posterior_predictive(idata, samples=samples * idata.posterior.dims["chain"], random_seed=18759)
        idata.add_groups({"posterior_predictive":{"cheshift":pps["cheshift"][None,:,:]}})

    if target_df is None:
        az.to_netcdf(idata, os.path.join("data", "trace_reference_structures.nc"))

    return dataframe, idata

예제 #7

0

파일 보기

파일: utils.py 프로젝트: grburgess/stan_utility

def sample_model(model, data, outprefix=None, **kwargs):
    """
    Sample Stan model and write the parameters into a simple hdf5 file

    :param model: Stan model to sample from
    :param data: data to pass to model
    :param file_name: HDF5 file name where samples will be stored

    All other arguments are passed to model.sampling().
    Result is cached.
    """
    fit = _sample_model(model, data, **kwargs)

    if outprefix is not None:
        arviz.to_netcdf(fit, outprefix + 'fit.hdf5')

    return fit

예제 #8

0

파일 보기

def main(model_label, session='7t2', bids_folder='/data'):

    if model_label not in ['model1', 'certainty', 'certainty_full']:
        raise NotImplementedError(f'Not implemented {model_label}')

    df = get_all_behavior(sessions=session, bids_folder=bids_folder)
    print(df)

    if model_label == 'model1':
        model = EvidenceModel(df)
    if model_label.startswith('certainty'):
        from scipy.stats import zscore

        df['z_certainty'] = df.groupby(['subject']).certainty.apply(zscore)
        df['z_certainty'] = df['z_certainty'].fillna(0.0)

        if model_label == 'certainty':
            model = EvidenceModelRegression(df,
                                            regressors={
                                                'evidence_sd1': 'z_certainty',
                                                'evidence_sd2': 'z_certainty'
                                            })
        elif model_label == 'certainty_full':
            model = EvidenceModelRegression(df,
                                            regressors={
                                                'evidence_sd1': 'z_certainty',
                                                'evidence_sd2': 'z_certainty',
                                                'risky_prior_mu':
                                                'z_certainty',
                                                'risky_prior_sd': 'z_certainty'
                                            })

    model.build_model()
    trace = model.sample(500, 500)

    target_folder = op.join(bids_folder, 'derivatives', 'evidence_models')
    if not op.exists(target_folder):
        os.makedirs(target_folder)

    az.to_netcdf(
        trace,
        op.join(target_folder,
                f'evidence_ses-{session}_model-{model_label}.nc'))

예제 #9

0

파일 보기

파일: fit_probit_model.py 프로젝트: Gilles86/risk_experiment

def main(model_type, session='7t2', bids_folder='/data'):

    if model_type not in ['model1', 'model2']:
        raise NotImplementedError(f'Not implemented {model_label}')

    df = get_all_behavior(sessions=session, bids_folder=bids_folder)
    model = ProbitModel(df, model_type, bids_folder)
    model.build_model()

    trace = model.sample(500, 500)

    target_folder = op.join(bids_folder, 'derivatives', 'probit_models')
    if not op.exists(target_folder):
        os.makedirs(target_folder)

    az.to_netcdf(
        trace,
        op.join(target_folder,
                f'evidence_ses-{session}_model-{model_label}.nc'))

예제 #10

0

파일 보기

def bayes_multiple_detector(t, s, n):
    scala = 1000
    with pm.Model() as abrupt_model:
        sigma = pm.Normal('sigma', mu=0.02 * scala, sigma=0.015 * scala)
        # sigma = pm.Uniform('sigma', 5, 15)
        mu = pm.Uniform("mu1", -1.5 * scala, -1.4 * scala)
        tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max())

        for i in np.arange(2, n + 2):
            _mu = pm.Uniform("mu" + str(i), -1.6 * scala, -1.4 * scala)
            mu = T.switch(tau >= t, mu, _mu)
            if (i < (n + 1)):
                tau = pm.DiscreteUniform("tau" + str(i), tau, t.max())
        s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s)

    with abrupt_model:
        pm.find_MAP()
        trace = pm.sample(20000, tune=5000)
        az.plot_trace(trace)
        az.to_netcdf(trace, getpath('tracepath') + 'bd9_4')
        plt.show()
        pm.summary(trace)
    return trace

예제 #11

0

파일 보기

def run(i, bin_list, runname, niter, nchain, adapt_delta, max_treedepth,
        verbose, save_chains, save_plots):

    idx = bin_list[i]
    stridx = str(idx)
    misc.printRUNNING(runname + " - Bin: " + stridx)

    try:

        # Checking the desired bin exists
        input_file = "../results/" + runname + "/" + runname + "_results.hdf5"

        struct = h5py.File(input_file, 'r+')
        check_bin = struct.get('out/' + stridx)
        if check_bin == None:
            misc.printFAILED("Bin " + stridx + " does not exist in " +
                             input_file)
            return 'ERROR'

        # Defining the version of the code to use
        codefile = 'stan_model/bayes-losvd_ghfit.stan'
        if not os.path.exists(codefile):
            misc.printFAILED(codefile + " does not exist.")
            sys.exit()

        # Defining output names and directories
        outdir = "../results/" + runname
        pdf_filename = outdir + "/" + runname + "_gh_diagnostics_bin" + stridx + ".pdf"
        summary_filename = outdir + "/" + runname + "_gh_Stan_summary_bin" + stridx + ".txt"
        arviz_filename = outdir + "/" + runname + "_gh_chains_bin" + str(
            idx) + ".netcdf"
        sample_filename = outdir + "/" + runname + "_gh_progress_bin" + stridx + ".csv"
        outhdf5 = outdir + "/" + runname + "_gh_results_bin" + stridx + ".hdf5"

        # Creating the structure with the data for Stan
        # -------
        # NOTE: losvd_obs, sigma_losvd is what goes into the GH fit
        #       losvd is the processed output of bayes_losvd_run.py
        #       losvd_obs = losvd[2,:]
        #       sigma_losvd is an averaged version of the true 1sigma uncertainties from the bayes_losvd_run.py fit
        # -------
        losvd = struct['out/' + stridx + '/losvd'][2, :]
        sigma = np.zeros((len(losvd), 2))
        sigma[:, 0] = np.fabs(struct['out/' + stridx + '/losvd'][1, :] - losvd)
        sigma[:, 1] = np.fabs(struct['out/' + stridx + '/losvd'][3, :] - losvd)
        sigma_losvd = np.mean(sigma, axis=1)

        data = {
            'nvel': struct['in/nvel'],
            'xvel': struct['in/xvel'],
            'losvd_obs': losvd,
            'sigma_losvd': sigma_losvd
        }

        # Creating a temporary file adding the input data to the input HDF5 file info
        temp = tempfile.NamedTemporaryFile()
        struct2 = h5py.File(temp.name, 'w')
        struct.copy('in', struct2)
        struct2.create_dataset("out/" + stridx + "/losvd",
                               data=np.array(struct['out/' + stridx +
                                                    '/losvd']),
                               compression="gzip")

        # Running the model
        with open(codefile, 'r') as myfile:
            code = myfile.read()
        model = stan_cache(model_code=code, codefile=codefile)
        fit = model.sampling(data=data,
                             iter=niter,
                             chains=nchain,
                             control={
                                 'adapt_delta': adapt_delta,
                                 'max_treedepth': max_treedepth
                             },
                             sample_file=sample_filename,
                             check_hmc_diagnostics=True)
        samples = fit.extract(permuted=True)
        diag_pars = fit.get_sampler_params()

        # If requested, saving sample chains
        if (save_chains == True):
            print("")
            print("# Saving chains in Arviz (NETCDF) format: " +
                  arviz_filename)
            arviz_data = az.from_pystan(posterior=fit)
            az.to_netcdf(arviz_data, arviz_filename)

        # Saving Stan's summary of main parameters on disk
        print("")
        print("# Saving Stan summary: " + summary_filename)
        unwanted = {'losvd_mod'}
        misc.save_stan_summary(fit,
                               unwanted=unwanted,
                               verbose=verbose,
                               summary_filename=summary_filename)

        # Processing output and saving results
        print("")
        print("# Processing and saving results: " + outhdf5)
        misc.process_stan_output_hdp(struct2, samples, outhdf5, stridx)

        # Creating diagnostic plots
        if (save_plots == True):
            if os.path.exists(pdf_filename):
                os.remove(pdf_filename)
            print("")
            print("# Saving diagnostic plots: " + pdf_filename)
            create_diagnostic_plots(idx, pdf_filename, fit, diag_pars, niter,
                                    nchain)

        # Removing progess files
        print("")
        print("# Deleting progress files")
        misc.delete_files(sample_filename, 'csv')
        misc.delete_files(sample_filename, 'png')

        # If we are here, we are DONE!
        struct.close()
        struct2.close()
        misc.printDONE(runname + " - Bin: " + stridx)

        return 'OK'

    except:

        misc.printFAILED()
        traceback.print_exc()

        return 'ERROR'

예제 #12

0

파일 보기

파일: mcmc.py 프로젝트: cuckoong/EB_spikes_trains

        y_op = Deterministic('y_op', r * (1 / mu - 1))

        # phi
        phi = Deterministic(
            'phi', (s * mu + n_sim * r) /
            (s + n_sim * r + n_sim * y_mean))  #, shape=n_sample)

        # Define likelihood
        likelihood = NegativeBinomial("y",
                                      alpha=r,
                                      mu=r * (1 / phi - 1),
                                      observed=y_shared)  # attention
        #Inference!
        idata = sample(1000,
                       cores=4,
                       progressbar=True,
                       chains=4,
                       tune=2000,
                       return_inferencedata=False)

        az.to_netcdf(idata, filename)
        print(az.summary(idata, var_names=['r', 'gam', 's']))
    '''
    with model:
        idata = az.from_netcdf(filename)

    # az.plot_trace(idata, var_names=['r','gam','s','beta0'])
    print(az.summary(idata, var_names=['r', 'gam', 's']))
    # print('')
    '''

예제 #13

0

파일 보기

def plot_reference_densities(residue_list, text_size=12, figsize=None, save=False):

    """Plot the reference densities of CS differences for high quality protein structures."""
    l = len(residue_list) % 3
    if l == 0:
        plot_lenght = len(residue_list) // 3
    else:
        plot_lenght = len(residue_list) // 3 + 1

    if not figsize:
        figsize = (13, plot_lenght * 2)

    _, ax = plt.subplots(
        plot_lenght,
        3,
        figsize=figsize,
        sharex=False,
        sharey=True,
        constrained_layout=True,
    )

    ax = ax.ravel()

    if os.path.isfile(os.path.join("data", "dataframe_reference_structures.csv")):
        dataframe_all = pd.read_csv(os.path.join("data", "dataframe_reference_structures.csv"))
    else:
        dataframe_all, trace_all = hierarchical_reg_reference()
        trace_all = az.from_pymc3(trace_all_proteins)
        az.to_netcdf(trace_all_proteins, os.path.join("data", "trace_reference_structures.nc"))
        dataframe.to_csv(os.path.join("data", "dataframe_reference_structures.csv"))

    categories_all = pd.Categorical(dataframe_all["res"])

    index_all = categories_all.codes

    perct_dict = {}

    if "CYS" in residue_list:
        dataframe_all = dataframe_all[dataframe_all.res != "CYS"]
    for i, residue in enumerate(residue_list):
        ca_teo = dataframe_all[dataframe_all.res == residue].y_pred.values
        ca_exp = dataframe_all[dataframe_all.res == residue].ca_exp.values

        difference_dist = ca_teo - ca_exp
        _, density = az.stats.density_utils.kde(difference_dist)
        x0, x1 = np.min(difference_dist), np.max(difference_dist)
        x_range = np.linspace(x0, x1, len(density))

        perct = np.percentile(difference_dist, [0, 5, 20, 80, 95, 100])
        perct_dict[residue] = perct

        idx0 = 0
        for index, p in enumerate(perct):
            ax[i].tick_params(labelsize=16)
            idx1 = np.argsort(np.abs(x_range - p))[0]

            ax[i].fill_between(
                x_range[idx0:idx1],
                density[idx0:idx1],
                color="C0",
                zorder=0,
                alpha=0.3,
            )
            idx0 = idx1

        ax[i].set_title(residue, fontsize=text_size)

    [
        ax[idy].spines[position].set_visible(False)
        for position in ["left", "top", "right"]
        for idy in range(len(ax))
    ]
    [ax_.set_yticks([]) for ax_ in ax]
    [ax_.set_xlim(-6, 6) for ax_ in ax]

    for i in range(1, len(ax) - len(residue_list) + 1):
        ax[-i].axis("off")

    if save:
        plt.savefig(f"reference.png", dpi=300, transparent=True)
    return _, ax, perct_dict

예제 #14

0

파일 보기

파일: moving_pictures_consistency_test.py 프로젝트: neurogenomics/scCODA

    ["left palm", "right palm"])],
                                        "site",
                                        baseline_index=None)

for n in range(n_chains):
    result_temp = model_palms.sample_hmc(num_results=int(20000), n_burnin=5000)

    results.append(result_temp)

#%%
res_all = az.concat(results, dim="chain")

print(res_all.posterior)

#%%
az.to_netcdf(res_all, write_path + "/multi_chain_50_len20000_all")

#%%

acc_probs = pd.DataFrame(
    pd.concat([r.effect_df.loc[:, "Inclusion probability"] for r in results]))

acc_probs["chain_no"] = np.concatenate(
    [np.repeat(i + 1, 21) for i in range(n_chains)])

acc_probs.index = acc_probs.index.droplevel(0)

acc_probs = acc_probs.reset_index()

print(acc_probs)

예제 #15

0

파일 보기

파일: fit.py 프로젝트: farr/PISNLineCosmography

        'gamma': gamma,
        'xs': randn(nobs, 3)
    }


f = m.sampling(data=d, iter=2 * args.iter, thin=args.thin, init=init)
fit = az.convert_to_inference_data(f)

print(f)

# Now that we're done with sampling, let's draw some pretty lines.
lines = (('H0', {}, true_params['H0']), ('Om', {}, true_params['Om']),
         ('w0', {}, true_params['w']), ('R0_30', {}, true_params['R0_30']),
         ('MMin', {}, true_params['MMin']), ('MMax', {}, true_params['MMax']),
         ('smooth_min', {},
          true_params['smooth_min']), ('smooth_max', {},
                                       true_params['smooth_max']),
         ('alpha', {}, true_params['alpha']), ('beta', {},
                                               true_params['beta']),
         ('gamma', {}, true_params['gamma']), ('neff_det', {}, 4 * nobs))

az.plot_trace(fit,
              var_names=[
                  'H0', 'Om', 'w0', 'R0_30', 'MMax', 'smooth_max', 'alpha',
                  'beta', 'gamma', 'neff_det'
              ],
              lines=lines)
savefig(args.tracefile)

az.to_netcdf(fit, args.chainfile)

예제 #16

0

파일 보기

                       f0,
                       fdot,
                       fddot,
                       sigma,
                       hbin,
                       log(args.Amin),
                       log(args.Amax),
                       N,
                       start_pt=start_pt)

    rstate = np.random.get_state()

    with model:
        trace = pm.sample(draws=args.draws,
                          tune=n_tune,
                          chains=args.chains,
                          cores=args.cores,
                          step=pm.NUTS(potential=QuadPotentialFullAdapt(
                              model.ndim, zeros(model.ndim)),
                                       target_accept=args.target_accept),
                          start=start_pt,
                          init=init)

    fit = az.from_pymc3(trace)

    ofile = args.outfile + '.tmp'
    if op.exists(ofile):
        os.remove(ofile)
    az.to_netcdf(fit, ofile)
    os.rename(ofile, args.outfile)

예제 #17

0

파일 보기

파일: pystan_fit_parallel.py 프로젝트: davidsohutskay/ACTA_2021_MCMC

                                verbose=True,
                                iter=2000,
                                chains=4,
                                n_jobs=-1,
                                sample_file='stanwound_sample_file.csv',
                                init='random',
                                init_r=0.1)

print(
    fit.stansummary(pars=('phif', 'phif_sigma', 'b', 'mu',
                          'von_mises_prob_sigma', 'kv', 'k0', 'kf', 'k2',
                          'stress_sigma')))

data = az.from_pystan(posterior=fit, )

az.to_netcdf(data, 'save_arviz_data_stanwound')

with open('stanwound_model_pickle.pkl', 'wb') as f:
    pickle.dump(multilevel_model, f, protocol=pickle.HIGHEST_PROTOCOL)

# pandas
dataframe = fit.to_dataframe(pars=('kv', 'k0', 'kf', 'k2', 'b', 'mu', 'phif',
                                   'phif_scaled'),
                             permuted=True)
dataframe.to_csv('stanwound_fit_permuted.csv')

predictive_dataframe = fit.to_dataframe(pars=(
    'stress_mean_predicted_phif_4',
    'stress_predicted_phif_4',
    'stress_mean_predicted_phif_20',
    'stress_predicted_phif_20',

예제 #18

0

파일 보기

파일: pymc_gp_time.py 프로젝트: aerubanov/Proj_Air_Quality

    with model:
        check = pm.sample_prior_predictive(samples=3)

    plt.plot(x.flatten(), y, label="data", color='red')
    for i in range(check['pr'].shape[0]):
        plt.plot(x.flatten(), check['pr'][i], alpha=0.3)
    plt.legend()
    plt.savefig("experiments/plots/gp_time_prior.png", format='png')
    plt.show()

    with model:
        y_ = pm.Normal("y", mu=pr, sigma=sigma, observed=y)

    with model:
        mp = pm.find_MAP(maxeval=300)
        trace = pm.sample(
            200,
            n_init=200,
            tune=100,
            chains=2,
            cores=2,
            return_inferencedata=True,
        )
        arviz.to_netcdf(trace, 'experiments/results/gp_time_trace')

    n_nonconverged = int(
        np.sum(arviz.rhat(trace)[["sigma", "pr_rotated_"]].to_array() > 1.03).
        values)
    print("%i variables MCMC chains"
          "appear not to have converged." % n_nonconverged)

예제 #19

0

파일 보기

def fit_multilevel_model(data_df: pd.DataFrame,
                         op_dir: pl.Path,
                         n_trials: int = 200):
    """
    Constructs the model from Gillan et al. eLife 2016;5:e11305. DOI: 10.7554/eLife.11305, pg. 19-20 using implementational
    details from the supplementary information of Otto et al. PNAS 2013; DOI: 10.1073/pnas.1312011110.
    Runs MAP estimation and NUTS sampling. Results are written into <op_dir>. Output of NUTS sampling is stored
    in a NetCDF file that can be read and analyzed using the package `arviz`
    :param pandas.DataFrame data_df: Concatenation of DataFrames returned by input_related.read_single_data_file
    :param pathlib.Path op_dir: path of output folder; must exist
    :param int n_trials: number of trials to fit
    :return: None
    """
    unique_subjects = pd.unique(data_df["subject_id"])
    n_subjects = unique_subjects.shape[0]
    data_df.sort_values(by=["subject_id", "trial_number"], inplace=True)

    print(f"Using data of {n_subjects} subjects: {unique_subjects.tolist()}")

    with pm.Model() as multilevel_model:

        # Priors for group level params
        mu_alpha = pm.Uniform(name='mu_alpha',
                              lower=0,
                              upper=1,
                              transform=None)
        sigma_alpha_log = pm.Exponential(name="sigma_alpha_log",
                                         lam=1.5,
                                         transform=None)
        sigma_alpha = pm.Deterministic(name='sigma_alpha',
                                       var=pm.math.exp(sigma_alpha_log))
        mu_beta = pm.Normal(name="mu_beta", mu=0, sigma=100)
        sigma_beta_log = pm.Exponential(name="sigma_beta_log",
                                        lam=1.5,
                                        transform=None)
        sigma_beta = pm.Deterministic(name='sigma_beta',
                                      var=pm.math.exp(sigma_beta_log))

        for subject_ind, (subject_id, subject_df) in enumerate(
                data_df.groupby("subject_id")):

            alpha = pm.Beta(name=f'alpha_{subject_id}',
                            alpha=mu_alpha * sigma_alpha,
                            beta=(1 - mu_alpha) * sigma_alpha)
            beta = pm.Normal(name=f"beta_{subject_id}",
                             mu=mu_beta,
                             sigma=sigma_beta,
                             shape=5)
            beta2, beta_mb, beta_mf0, beta_mf1, beta_st = beta[0], beta[
                1], beta[2], beta[3], beta[4]

            print(f"{datetime.datetime.now()} Doing {subject_id}")
            choice1_repeated = subject_df["choice1"].astype(
                bool).diff().fillna(False).astype(int)
            data_df.loc[subject_df.index.values,
                        "choice1_repeated"] = choice1_repeated

            # Value function
            # first dimension state2, second dimension choice2
            q2_arr = np.zeros((2, 2), dtype=object)
            # dimension is choice1
            qmb_arr = np.zeros((2, ), dtype=object)
            qmf0_arr = np.zeros((2, ), dtype=object)
            qmf1_arr = np.zeros((2, ), dtype=object)

            for trial_number, trial_df in subject_df.groupby("trial_number"):

                if trial_number > n_trials:
                    continue

                subject_trial_row = trial_df.iloc[0]
                st = subject_trial_row["state2"]
                c2t = subject_trial_row["choice2"]
                c1t = subject_trial_row["choice1"]
                rt = subject_trial_row["reward"]
                repeated_choice = choice1_repeated[trial_df.index.values[0]]

                p1 = logisitic(beta2 * get_value_diff_0m1(q2_arr[st, :]))

                # observations
                c2t_rv = pm.Bernoulli(name=f'c2_{trial_number}_{subject_ind}',
                                      observed=c2t,
                                      p=p1)

                q2_arr[st, c2t] = (1 - alpha) * q2_arr[st, c2t] + rt

                p1 = logisitic(beta_mb * get_value_diff_0m1(qmb_arr) +
                               beta_mf0 * get_value_diff_0m1(qmf0_arr) +
                               beta_mf1 * get_value_diff_0m1(qmf1_arr) +
                               beta_st * repeated_choice)
                c1t_rv = pm.Bernoulli(name=f'c1_{trial_number}_{subject_ind}',
                                      observed=c1t,
                                      p=p1)

                qmb_arr[0] = pm.math.maximum(q2_arr[0, 0], q2_arr[0, 1])
                qmb_arr[1] = pm.math.maximum(q2_arr[1, 0], q2_arr[1, 1])

                qmf0_arr[c1t] = (1 - alpha) * qmf0_arr[c1t] + q2_arr[st, c2t]
                qmf1_arr[c1t] = (1 - alpha) * qmf1_arr[c1t] + rt

        gc.collect()

        start = {
            "mu_alpha": 0.5,
            "sigma_alpha_log": 1,
            "mu_beta": 0.5,
            "sigma_beta_log": 1
        }

        print(f"{datetime.datetime.now()} MAP estimation started")
        map_estimate = pm.find_MAP(model=multilevel_model,
                                   progressbar=False,
                                   start=start)
        print(f"{datetime.datetime.now()} MAP estimation done")
        print(map_estimate)
        with open(
                op_dir /
                f"map_estimate_multilevel_{n_subjects}subjects_{n_trials}trials.json",
                "w") as fp:
            json.dump({k: v.tolist() for k, v in map_estimate.items()}, fp)

        print(f"{datetime.datetime.now()} sampling started")
        trace = pm.sample(draws=2000,
                          tune=1000,
                          return_inferencedata=False,
                          compute_convergence_checks=True,
                          progressbar=False,
                          cores=4,
                          start=start)
        print(f"{datetime.datetime.now()} sampling done")
        print(arviz.summary(trace, round_to=2))
        arviz.to_netcdf(
            data=trace,
            filename=op_dir /
            f"sampling_results_multilevel_{n_subjects}subjects_{n_trials}trials.nc"
        )

예제 #20

0

파일 보기

파일: gp_spatial.py 프로젝트: aerubanov/Proj_Air_Quality

    data_file = 'DATA/processed/dataset.csv'
    x, y, x_new = get_data(data_file)

    with pm.Model() as model:
        mt = pm.gp.cov.Matern32(2, ls=0.1)

        gp = pm.gp.Latent(cov_func=mt)
        pr = gp.prior('pr', X=x)
        sigma = pm.HalfNormal('sigma', sigma=2)
        f_star = gp.conditional("f_star", x_new)

    with model:
        check = pm.sample_prior_predictive(samples=1)

    with model:
        y_ = pm.Normal("y", mu=pr, sigma=sigma, observed=y)

    with model:
        trace = pm.sample(200, n_init=100, tune=100, chains=2, cores=2, return_inferencedata=True)
        arviz.to_netcdf(trace, 'experiments/results/gp_spatial_trace')

    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
    ax1.hexbin(x_new[:, 0], x_new[:, 1], C=trace.posterior['f_star'][0, :, :].mean(axis=0),
               gridsize=30, cmap='rainbow')
    plot = ax1.scatter(x[:, 0], x[:, 1], c=y, s=300, cmap='rainbow')
    ax2.scatter(x[:, 0], x[:, 1], c=check['pr'], s=300, cmap='rainbow')
    ax1.set_title("Data + Posterior")
    ax2.set_title("Prior")
    plt.colorbar(plot)
    plt.show()

예제 #21

0

파일 보기

 def save_posterior(self, filename: str) -> None:
     posterior = self.posterior
     arviz.to_netcdf(posterior, filename)
     logger.info(f"Saved posterior samples to {filename}.")

예제 #22

0

파일 보기

파일: combine_samples.py 프로젝트: CalumGabbutt/ticktockclock

def main():
    parser = argparse.ArgumentParser(description='Combine the individual posteriors for each S value.')
    parser.add_argument('datafile', type=str,
                        help='path to csv containing beta values')
    parser.add_argument('patientinfofile', type=str,
                        help='path to csv containing patientinfo')
    parser.add_argument('outputdir', type=str, default='~', 
                        help='path to folder in which to store output')
    parser.add_argument('sample', type=str,
                        help='samplename of beta array (must be a col in datafile index in patientinfo)')

    # Execute the parse_args() method
    args = parser.parse_args()

    datafile = args.datafile
    patientinfofile = args.patientinfofile
    outputdir = args.outputdir
    sample = args.sample

    outsamplesdir = os.path.join(outputdir, sample, 'posterior')

    outfinaldir = os.path.join(outputdir, sample, 'outfinal')
    os.makedirs(outfinaldir, exist_ok=True)

    beta_values = pd.read_csv(datafile, index_col = 0)
    patientinfo = pd.read_csv(patientinfofile, keep_default_na=False, index_col = 0) 

    beta = beta_values[sample].dropna().values
    age = patientinfo.loc[sample, 'age']

    outsampleslist = glob.glob(os.path.join(outsamplesdir, 'sample_*.pkl'))

    S = list()
    results = dict()
    for outsamples in outsampleslist:
        s = int(outsamples.split('.pkl')[0].split('_')[-1])
        try:
            with open(outsamples, 'rb') as f:
                res = joblib.load(f)

            results[s] = res

            S.append(s)
            print(s)
        except EOFError:
            print('sample_{}.pkl is not a correctly formatted pickle file'.format(s))


    S.sort()

    n = len(beta)

    logZs = np.empty(len(S))
    logZerrs = np.empty(len(S))
    Nsamples = np.empty(len(S), dtype=int)

    for index, s in enumerate(S):
        try:
            logZs[index] = results[s].logz[-1]
            logZerrs[index] = results[s].logzerr[-1] 
            Nsamples[index] = results[s].niter 
        except:
            logZs = results[s]['logz']
            logZerrs = results[s]['logzerr'] 
            Nsamples = results[s]['niter']

    logZs_bootstrap = np.random.normal(loc = logZs, scale=logZerrs, size = (10000, len(logZs)))  

    prob_s = softmax(logZs)
    prob_s_bootstrap = softmax(logZs_bootstrap, axis=1)
    prob_s_err = np.std(prob_s_bootstrap, axis=0) 

    print('\nS:P(S)')
    for i, s in enumerate(S):
        print('{}:{:.3e}'.format(s, prob_s[i]))

    df = pd.DataFrame({'S':S, 'prob':prob_s, 'prob_err':prob_s_err})
    df.S.astype(int)
    df.to_csv(os.path.join(outfinaldir, "prob_of_S.csv"), index=False)

    sns.set_style('white') 
    sns.set_context("paper", font_scale=1.6)

    fig, ax = plt.subplots()
    ax.bar(S, prob_s, yerr=prob_s_err, color=sns.xkcd_rgb["denim blue"])
    sns.despine()
    plt.xlabel("Stem Cell Number (S)")
    plt.ylabel("Probability")
    plt.tight_layout()
    plt.savefig(os.path.join(outfinaldir, "probability_S.png"), dpi = 300)
    plt.close()


    Ndraws = 3000
    Ssamples = np.random.choice(S, size=Ndraws, p=prob_s)


    final_posterior = np.empty((Ndraws, 8))
    final_posterior[:, -1] = Ssamples
    beta_hat = np.empty((1, Ndraws, n))
    LL = np.empty((1, Ndraws, n))

    progress_ints = (np.arange(0.1, 1.1, 0.1) * Ndraws - 1).astype(int)  
    counter = 10
    for i in range(Ndraws):

        if i in progress_ints:
            print('{}% complete'.format(counter))
            counter += 10

        s = Ssamples[i]
        try:
            posterior =  dynesty.utils.resample_equal(results[s].samples, softmax(results[s].logwt))
        except:
            posterior = results[s]['samples']
        random_row = np.random.randint(posterior.shape[0])
        final_posterior[i, :7] = posterior[random_row, :7]
        lamsample, musample, gammasample, deltasample, etasample = final_posterior[i, :5]

        kappasample = posterior[random_row, 7:]

        LL[0, i, :] = flipflop.loglikelihood_perpoint(posterior[random_row, :], beta, s, age)

        ProbDist = flipflop.runModel(s, lamsample, musample, gammasample, age)

        k_sample = np.random.choice(np.arange(0, 2*s+1), size=n, p=ProbDist)
        beta_sample = k_sample / (2*s)

        beta_sample = flipflop.rescale_beta(beta_sample, deltasample, etasample)

        beta_hat[0, i, :] = flipflop.beta_rvs(beta_sample, kappasample[k_sample])
        

    with open(os.path.join(outfinaldir, "finalposterior.pkl"), 'wb') as f:
        joblib.dump(final_posterior, f)

    df = pd.DataFrame({'lam':final_posterior[:,0],  
                        'mu':final_posterior[:,1], 
                        'gamma':final_posterior[:,2], 
                        'delta':final_posterior[:,3],
                        'eta':final_posterior[:,4], 
                        'kappamean':final_posterior[:,5],
                        'kappadisp':final_posterior[:,6],
                        'S':Ssamples})
    df.to_csv(os.path.join(outfinaldir, "finalposterior.csv"), index=False)

    fig, ax = plt.subplots()      
    plt.hist(beta, np.linspace(0, 1, 100), density=True, alpha=0.4, linewidth=0) 
    plt.hist(np.ravel(beta_hat), np.linspace(0, 1, 100), density=True, alpha=0.4, linewidth=0) 
    plt.legend(("Data", "Posterior predictive"))
    plt.xlabel("Fraction Methylated (Beta)")
    plt.ylabel("Probability density")
    sns.despine()
    plt.tight_layout()
    plt.savefig("{}/posterior_predictive.png".format(outfinaldir), dpi = 300)
    plt.close()

    inference = az.from_dict(posterior={'lam':final_posterior[:,0],  
                                        'mu':final_posterior[:,1], 
                                        'gamma':final_posterior[:,2], 
                                        'delta':final_posterior[:,3],
                                        'eta':final_posterior[:,4], 
                                        'kappamean':final_posterior[:,5],
                                        'kappadisp':final_posterior[:,6],
                                        'S':Ssamples}, 
                            observed_data={'beta':beta},
                            posterior_predictive={'beta_hat':beta_hat}, 
                            sample_stats={"log_likelihood":LL}
                                    ) 

    az.to_netcdf(inference, "{}/inference.nc".format(outfinaldir))

    pairs = az.plot_pair(inference, var_names=('lam', 'mu', 'gamma', 'delta', 'eta', 'kappamean', 'kappadisp')) 
    plt.savefig('{}/plot_pairs.png'.format(outfinaldir), dpi=300)
    plt.close()

    az.plot_loo_pit(inference, y='beta', y_hat='beta_hat', ecdf=True)
    plt.savefig('{}/plot_loo_pit_ecdf.png'.format(outfinaldir), dpi=300)
    plt.close()

    sns.set_context("paper", font_scale=1.0)
    az.plot_loo_pit(inference, y='beta', y_hat='beta_hat')
    plt.ylabel('Leave One Out - Probability Integral Transform')
    plt.xlabel('Cumulative Density Function')
    plt.savefig('{}/plot_loo_pit.png'.format(outfinaldir), dpi=300)
    plt.close()

예제 #23

0

파일 보기

파일: lat_gp_example.py 프로젝트: aerubanov/Proj_Air_Quality

    with pm.Model() as model:
        l_ = pm.Gamma("l", alpha=2, beta=1)
        eta = pm.HalfCauchy("eta", beta=1)

        cov = eta ** 2 * pm.gp.cov.Matern52(1, l_)
        gp = pm.gp.Latent(cov_func=cov)

        f = gp.prior("f", X=X)

        sigma = pm.HalfCauchy("sigma", beta=5)
        nu = pm.Gamma("nu", alpha=2, beta=0.1)
        y_ = pm.StudentT("y", mu=f, lam=1.0 / sigma, nu=nu, observed=y)

        trace = pm.sample(200, n_init=100, tune=100, chains=2, cores=2, return_inferencedata=True)
        az.to_netcdf(trace, 'src/experiments/results/lat_gp_trace')

    # check Rhat, values above 1 may indicate convergence issues
    n_nonconverged = int(np.sum(az.rhat(trace)[["eta", "l", "f_rotated_"]].to_array() > 1.03).values)
    print("%i variables MCMC chains appear not to have converged." % n_nonconverged)

    # plot the results
    fig = plt.figure(figsize=(12, 5))
    ax = fig.gca()

    # plot the samples from the gp posterior with samples and shading
    from pymc3.gp.util import plot_gp_dist

    plot_gp_dist(ax, trace.posterior["f"][0, :, :], X)

    # plot the data and the true latent function

예제 #24

0

파일 보기

def run_model(
    model_func,
    data,
    ep,
    num_samples=500,
    num_warmup=500,
    num_chains=4,
    target_accept=0.75,
    max_tree_depth=15,
    save_results=True,
    output_fname=None,
    model_kwargs=None,
    save_json=False,
    chain_method="parallel",
    heuristic_step_size=True,
):
    """
    Model run utility

    :param model_func: numpyro model
    :param data: PreprocessedData object
    :param ep: EpidemiologicalParameters object
    :param num_samples: number of samples
    :param num_warmup: number of warmup samples
    :param num_chains: number of chains
    :param target_accept: target accept
    :param max_tree_depth: maximum treedepth
    :param save_results: whether to save full results
    :param output_fname: output filename
    :param model_kwargs: model kwargs -- extra arguments for the model function
    :param save_json: whether to save json
    :param chain_method: Numpyro chain method to use
    :param heuristic_step_size: whether to find a heuristic step size
    :return: posterior_samples, warmup_samples, info_dict (dict with assorted diagnostics), Numpyro mcmc object
    """
    print(
        f"Running {num_chains} chains, {num_samples} per chain with {num_warmup} warmup steps"
    )
    nuts_kernel = NUTS(
        model_func,
        init_strategy=init_to_median,
        target_accept_prob=target_accept,
        max_tree_depth=max_tree_depth,
        find_heuristic_step_size=heuristic_step_size,
    )
    mcmc = MCMC(
        nuts_kernel,
        num_samples=num_samples,
        num_warmup=num_warmup,
        num_chains=num_chains,
        chain_method=chain_method,
    )
    rng_key = random.PRNGKey(0)

    # hmcstate = nuts_kernel.init(rng_key, 1, model_args=(data, ep))
    # nRVs = hmcstate.adapt_state.inverse_mass_matrix.size
    # inverse_mass_matrix = init_diag_inv_mass_mat * jnp.ones(nRVs)
    # mass_matrix_sqrt_inv = np.sqrt(inverse_mass_matrix)
    # mass_matrix_sqrt = 1./mass_matrix_sqrt_inv
    # hmcstate = hmcstate._replace(adapt_state=hmcstate.adapt_state._replace(inverse_mass_matrix=inverse_mass_matrix))
    # hmcstate = hmcstate._replace(adapt_state=hmcstate.adapt_state._replace(mass_matrix_sqrt_inv=mass_matrix_sqrt_inv))
    # hmcstate = hmcstate._replace(adapt_state=hmcstate.adapt_state._replace(mass_matrix_sqrt=mass_matrix_sqrt))
    # mcmc.post_warmup_state = hmcstate

    info_dict = {
        "model_name": model_func.__name__,
    }

    start = time.time()
    if model_kwargs is None:
        model_kwargs = {}

    info_dict["model_kwargs"] = model_kwargs

    # also collect some extra information for better diagonstics!
    print(f"Warmup Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    mcmc.warmup(
        rng_key,
        data,
        ep,
        **model_kwargs,
        collect_warmup=True,
        extra_fields=["num_steps", "mean_accept_prob", "adapt_state"],
    )
    mcmc.get_extra_fields()["num_steps"].block_until_ready()

    info_dict["warmup"] = {}
    info_dict["warmup"]["num_steps"] = np.array(
        mcmc.get_extra_fields()["num_steps"]).tolist()
    info_dict["warmup"]["step_size"] = np.array(
        mcmc.get_extra_fields()["adapt_state"].step_size).tolist()
    info_dict["warmup"]["inverse_mass_matrix"] = {}

    all_mass_mats = jnp.array(
        jnp.array_split(
            mcmc.get_extra_fields()["adapt_state"].inverse_mass_matrix,
            num_chains,
            axis=0,
        ))

    print(all_mass_mats.shape)

    for i in range(num_chains):
        info_dict["warmup"]["inverse_mass_matrix"][
            f"chain_{i}"] = all_mass_mats[i, -1, :].tolist()

    info_dict["warmup"]["mean_accept_prob"] = np.array(
        mcmc.get_extra_fields()["mean_accept_prob"]).tolist()

    warmup_samples = mcmc.get_samples()

    print(f"Sample Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    mcmc.run(
        rng_key,
        data,
        ep,
        **model_kwargs,
        extra_fields=["num_steps", "mean_accept_prob", "adapt_state"],
    )

    posterior_samples = mcmc.get_samples()
    # if you don't block this, the timer won't quite work properly.
    posterior_samples[list(posterior_samples.keys())[0]].block_until_ready()
    print(f"Sample Finished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

    end = time.time()
    time_per_sample = float(end - start) / num_samples
    divergences = int(mcmc.get_extra_fields()["diverging"].sum())

    info_dict["time_per_sample"] = time_per_sample
    info_dict["total_runtime"] = float(end - start)
    info_dict["divergences"] = divergences

    info_dict["sample"] = {}
    info_dict["sample"]["num_steps"] = np.array(
        mcmc.get_extra_fields()["num_steps"]).tolist()
    info_dict["sample"]["mean_accept_prob"] = np.array(
        mcmc.get_extra_fields()["mean_accept_prob"]).tolist()
    info_dict["sample"]["step_size"] = np.array(
        mcmc.get_extra_fields()["adapt_state"].step_size).tolist()

    print(f"Sampling {num_samples} samples per chain took {end - start:.2f}s")
    print(f"There were {divergences} divergences.")

    grouped_posterior_samples = mcmc.get_samples(True)

    all_ess = np.array([])
    for k in grouped_posterior_samples.keys():
        ess = numpyro.diagnostics.effective_sample_size(
            np.asarray(grouped_posterior_samples[k]))
        all_ess = np.append(all_ess, ess)

    print(f"{np.sum(np.isnan(all_ess))}  ESS were nan")
    all_ess = all_ess[np.logical_not(np.isnan(all_ess))]

    info_dict["ess"] = {
        "med": float(np.percentile(all_ess, 50)),
        "lower": float(np.percentile(all_ess, 2.5)),
        "upper": float(np.percentile(all_ess, 97.5)),
        "min": float(np.min(all_ess)),
        "max": float(np.max(all_ess)),
    }
    print(
        f"Mean ESS: {info_dict['ess']['med']:.2f} [{info_dict['ess']['lower']:.2f} ... {info_dict['ess']['upper']:.2f}]"
    )

    if num_chains > 1:
        all_rhat = np.array([])
        for k in grouped_posterior_samples.keys():
            rhat = numpyro.diagnostics.gelman_rubin(
                np.asarray(grouped_posterior_samples[k]))
            all_rhat = np.append(all_rhat, rhat)

        print(f"{np.sum(np.isnan(all_rhat))} Rhat were nan")
        all_rhat = all_rhat[np.logical_not(np.isnan(all_rhat))]

        info_dict["rhat"] = {
            "med": float(np.percentile(all_rhat, 50)),
            "upper": float(np.percentile(all_rhat, 97.5)),
            "lower": float(np.percentile(all_rhat, 2.5)),
            "min": float(np.max(all_rhat)),
            "max": float(np.min(all_rhat)),
        }

        print(
            f"Rhat: {info_dict['rhat']['med']:.2f} [{info_dict['rhat']['lower']:.2f} ... {info_dict['rhat']['upper']:.2f}]"
        )

    if save_results:
        print("Saving .netcdf")
        try:
            inf_data = az.from_numpyro(mcmc)

            if output_fname is None:
                output_fname = f'{model_func.__name__}-{datetime.now(tz=None).strftime("%d-%m;%H-%M-%S")}.netcdf'

            az.to_netcdf(inf_data, output_fname)

            json_fname = output_fname.replace(".netcdf", ".json")
            if save_json:
                print("Saving Json")
                with open(json_fname, "w") as f:
                    json.dump(info_dict, f, ensure_ascii=False, indent=4)

        except Exception as e:
            print(e)

    return posterior_samples, warmup_samples, info_dict, mcmc

예제 #25

0

파일 보기

파일: bayes_losvd_run.py 프로젝트: prashjet/BAYES-LOSVD

def run(i, bin_list, runname, niter, nchain, adapt_delta, max_treedepth, 
        verbose=False, save_chains=False, save_plots=False, fit_type=None):

    idx = bin_list[i]
    stridx = str(idx)
    misc.printRUNNING(runname+" - Bin: "+stridx+" - Fit type: "+fit_type) 

    try:

        # Defining the version of the code to use
        codefile, extrapars = misc.read_code(fit_type)
     
        # Defining output names and directories
        rootname         = runname+"-"+fit_type
        outdir           = "../results/"+rootname
        pdf_filename     = outdir+"/"+rootname+"_diagnostics_bin"+str(idx)+".pdf"
        summary_filename = outdir+"/"+rootname+"_Stan_summary_bin"+str(idx)+".txt"
        arviz_filename   = outdir+"/"+rootname+"_chains_bin"+str(idx)+".netcdf"
        sample_filename  = outdir+"/"+rootname+"_progress_bin"+str(idx)+".csv"
        outhdf5          = outdir+"/"+rootname+"_results_bin"+str(idx)+".hdf5"

        # Creating the basic structure with the data for Stan
        struct = h5py.File("../preproc_data/"+runname+".hdf5","r")
        data   = {'npix_obs':      np.array(struct['in/npix_obs']), 
                  'ntemp':         np.array(struct['in/ntemp']), 
                  'nvel':          np.array(struct['in/nvel']),
                  'npix_temp':     np.array(struct['in/npix_temp']),
                  'mask':          np.array(struct['in/mask']), 
                  'nmask':         np.array(struct['in/nmask']), 
                  'porder':        np.array(struct['in/porder']),
                  'spec_obs':      np.array(struct['in/spec_obs'][:,idx]), 
                  'sigma_obs':     np.array(struct['in/sigma_obs'][:,idx]), 
                  'templates':     np.array(struct['in/templates']),
                  'mean_template': np.array(struct['in/mean_template']),
                  'velscale':      np.array(struct['in/velscale']),
                  'xvel':          np.array(struct['in/xvel'])}

        # Adding any extra parameter needed for that particular fit_type
        for key, val in extrapars.items():
            data[key] = val
            
        # Running the model
        with open(codefile, 'r') as myfile:
           code = myfile.read()
        model   = stan_cache(model_code=code, codefile=codefile) 
        fit     = model.sampling(data=data, iter=niter, chains=nchain, 
                  control={'adapt_delta':adapt_delta, 'max_treedepth':max_treedepth}, 
                  sample_file=sample_filename, check_hmc_diagnostics=True)

        samples   = fit.extract(permuted=True) # Extracting parameter samples
        diag_pars = fit.get_sampler_params()   # Getting sampler diagnostic params
        
        # If requested, saving sample chains
        if (save_chains == True):
           print("")
           print("# Saving chains in Arviz (NETCDF) format: "+arviz_filename) 
           arviz_data = az.from_pystan(posterior=fit, observed_data=['mask','spec_obs','sigma_obs'])
           az.to_netcdf(arviz_data,arviz_filename)

        # Saving Stan's summary of main parameters on disk
        print("")
        print("# Saving Stan summary: "+summary_filename)         
        unwanted = {'spec','conv_spec','poly','bestfit','a','losvd_'}
        misc.save_stan_summary(fit, unwanted=unwanted, verbose=verbose, summary_filename=summary_filename)

        # Processing output and saving results
        print("")
        print("# Processing and saving results: "+outhdf5)
        misc.process_stan_output_hdp(struct,samples,outhdf5,stridx)

        # Creating diagnostic plots
        if (save_plots == True):
            if os.path.exists(pdf_filename):
              os.remove(pdf_filename)    
            print("")
            print("# Saving diagnostic plots: "+pdf_filename) 
            create_diagnostic_plots(idx, pdf_filename, fit, diag_pars, niter, nchain)
    
        # Removing progess files
        print("")
        print("# Deleting progress files")
        misc.delete_files(sample_filename,'csv')
        misc.delete_files(sample_filename,'png')

        # If we are here, we are DONE!
        struct.close()
        misc.printDONE(runname+" - Bin: "+stridx+" - Fit type: "+fit_type)

        return 'OK'
    
    except Exception:

        misc.printFAILED()
        traceback.print_exc()            
          
        return 'ERROR'