Esempio n. 1
0
    def from_netcdf(cls, *file_name, fast_open=False):

        if len(file_name) == 1:

            inference_data = [av.from_netcdf(file_name[0])]

        else:

            inference_data = [av.from_netcdf(f) for f in file_name]

        return cls(*inference_data, fast_open=fast_open)
Esempio n. 2
0
 def test_nested_model_to_netcdf(self, tmp_path):
     with pm.Model("scope") as model:
         b = pm.Normal("var")
         trace = pm.sample(100, tune=0)
     az.to_netcdf(trace, tmp_path / "trace.nc")
     trace1 = az.from_netcdf(tmp_path / "trace.nc")
     assert "scope::var" in trace1.posterior
Esempio n. 3
0
def do_predict(data_path: str):
    """Generate MCMC samples given a Maud output folder at train_path.

    This function creates a new directory in output_dir with a name starting
    with "maud-predict-output". It first copies the testing directory at
    train_path into the new this directory at new_dir/user_input, then runs the
    running_stan.predict_out_of_sample function to write samples in
    new_dir/oos_samples.

    The trained output is stored in the new_dir/trained_samples folder along
    with the user input required to generate the trained samples.

    """
    idata_train = az.from_netcdf(os.path.join(data_path, "idata.nc"))
    mi = load_maud_input(os.path.join(data_path, "user_input"))
    now = datetime.now().strftime("%Y%m%d%H%M%S")
    output_name = f"maud-predict_output-{mi.config.name}-{now}"
    output_path = os.path.join(data_path, output_name)
    test_samples_path = os.path.join(output_path, "test_samples")
    print("Creating output directory: " + output_path)
    os.mkdir(output_path)
    os.mkdir(test_samples_path)
    idata_predict = predict(mi, output_path, idata_train)
    # delete attrs hack to make netcdf save work:
    # https://github.com/arviz-devs/arviz/issues/1554
    idata_predict.sample_stats.attrs = {}  # type: ignore
    idata_predict.posterior.attrs = {}  # type: ignore
    idata_predict.to_netcdf(os.path.join(output_path, "idata_predict.nc"))
Esempio n. 4
0
    def test_io_function(self, data, eight_schools_params):
        # create inference data and assert all attributes are present
        inference_data = self.get_inference_data(  # pylint: disable=W0612
            data, eight_schools_params)
        test_dict = {
            "posterior": ["eta", "theta", "mu", "tau"],
            "posterior_predictive": ["eta", "theta", "mu", "tau"],
            "sample_stats": ["eta", "theta", "mu", "tau"],
            "prior": ["eta", "theta", "mu", "tau"],
            "prior_predictive": ["eta", "theta", "mu", "tau"],
            "sample_stats_prior": ["eta", "theta", "mu", "tau"],
            "observed_data": ["J", "y", "sigma"],
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails

        # check filename does not exist and save InferenceData
        here = os.path.dirname(os.path.abspath(__file__))
        data_directory = os.path.join(here, "..", "saved_models")
        filepath = os.path.join(data_directory, "io_function_testfile.nc")
        # az -function
        to_netcdf(inference_data, filepath)

        # Assert InferenceData has been saved correctly
        assert os.path.exists(filepath)
        assert os.path.getsize(filepath) > 0
        inference_data2 = from_netcdf(filepath)
        fails = check_multiple_attrs(test_dict, inference_data2)
        assert not fails
        os.remove(filepath)
        assert not os.path.exists(filepath)
Esempio n. 5
0
 def test_get_scale_factor(self, filename):
     fp = pathlib.Path(pathlib.Path(__file__).parent, "testdata", filename)
     idata = arviz.from_netcdf(str(fp))
     assert isinstance(idata, arviz.InferenceData)
     scale_factor = model.get_scale_factor(idata)
     assert isinstance(scale_factor, xarray.DataArray)
     assert scale_factor.coords.dims == ("sample", )
     assert 1000 < scale_factor.mean() < 200_000
Esempio n. 6
0
def get_example_results() -> az.InferenceData:
    """Get example inference results data.

    Returns:
        arviz.InferenceData: Inference data object.
    """

    return az.from_netcdf(
        os.path.join(PACKAGE_DIR, "data", "example_results.nc"))
Esempio n. 7
0
 def test_plot_details(self, filename, plot_positive):
     fp = pathlib.Path(pathlib.Path(__file__).parent, "testdata", filename)
     idata = arviz.from_netcdf(str(fp))
     assert isinstance(idata, arviz.InferenceData)
     fig, axs = plotting.plot_details(
         idata,
         plot_positive=plot_positive
     )
     pyplot.close()
Esempio n. 8
0
 def test_get_case_curves(self, filename):
     fp = pathlib.Path(pathlib.Path(__file__).parent, "testdata", filename)
     idata = arviz.from_netcdf(str(fp))
     assert isinstance(idata, arviz.InferenceData)
     case_curves = model.get_case_curves(idata)
     assert isinstance(case_curves, tuple)
     for obj in case_curves:
         assert isinstance(obj, xarray.DataArray)
         assert obj.coords.dims == ("date", "sample")
Esempio n. 9
0
    def load_idata(
        self,
        path):
        """Load idata object obtained from saved idata.

        Args:
            path (str): String specifying the path for loading the idata. 
            File-extension is automatically inserted and is hardset to .nc.
        """         

        self.m_idata = az.from_netcdf(f'{path}.nc')
Esempio n. 10
0
def main():
    infd = az.from_netcdf(NCDF_FILE)
    scores = pd.read_csv(PREPARED_DATA_CSV)
    # true_abilities = scores.groupby("name")["true_ability"].first()

    f, ax = plt.subplots(figsize=[6, 16])
    ax = plot_marginals(infd, "ability", ax)
    f.savefig(os.path.join(PLOT_DIR, "marginals.png"), bbox_inches="tight")

    f, ax = plt.subplots(figsize=[20, 10])
    ax = plot_ppc(infd, scores, ax)
    f.savefig(os.path.join(PLOT_DIR, "ppc.png"), bbox_inches="tight")
Esempio n. 11
0
 def from_netcdf(cls, netcdf_path: str, restart: bool = False):
     logger.info(f"Loading NetCDF chain; restart = {restart}")
     samples = arviz.from_netcdf(netcdf_path)
     # if we're restarting sampling, take the last position
     if restart:
         last = samples.posterior.isel(draw=-1).mean(
             dim=["chain"]).to_array()
         initial = np.array(last)[0]
     # generate the initial values from the mean of the posterior
     else:
         initial = np.array(
             samples.posterior.mean(dim=["chain", "draw"]).to_array())[0]
     helper_obj = cls(initial)
     helper_obj.chain = np.array(samples.posterior.to_array()).squeeze()
     return helper_obj
Esempio n. 12
0
def task_render_region_result(country: str, region: str,
                              run_date: pd.Timestamp):
    """ Render a CSV with summary output for a given region """
    az.rcParams["data.load"] = "eager"

    with tempfile.NamedTemporaryFile() as fp:
        s3.Bucket(S3_BUCKET).download_file(
            get_inference_data_key(run_date, region, country=country), fp.name)
        fp.seek(0)
        inference_data = az.from_netcdf(fp.name)

    summary = summarize_inference_data(inference_data)
    key = get_state_output_key(run_date, region, country=country)
    with fs.open(f"{S3_BUCKET}/{key}", "w") as file:
        summary.to_csv(file)
Esempio n. 13
0
 def test_io_function(self, data, eight_schools_params):
     inference_data = self.get_inference_data(  # pylint: disable=W0612
         data, eight_schools_params)
     assert hasattr(inference_data, "posterior")
     here = os.path.dirname(os.path.abspath(__file__))
     data_directory = os.path.join(here, "saved_models")
     filepath = os.path.join(data_directory, "io_function_testfile.nc")
     # az -function
     to_netcdf(inference_data, filepath)
     assert os.path.exists(filepath)
     assert os.path.getsize(filepath) > 0
     inference_data2 = from_netcdf(filepath)
     assert hasattr(inference_data2, "posterior")
     os.remove(filepath)
     assert not os.path.exists(filepath)
Esempio n. 14
0
def trace_export_4_mat(srcname, destname, n):
    trace = az.from_netcdf(srcname)
    data = {}
    size = trace.posterior.sigma.data.size
    data['sigma'] = trace.posterior.sigma.data.reshape(size, 1)
    data['dtau'] = trace.posterior.dtau.data.reshape(size, 1)

    for i in np.arange(1, n + 1):
        data['mu' + str(i)] = trace.posterior['mu' + str(i)].data.reshape(
            size, 1)
        data['tau' + str(i)] = trace.posterior['tau' + str(i)].data.reshape(
            size, 1)
    data['mu' + str(n + 1)] = trace.posterior['mu' + str(n + 1)].data.reshape(
        size, 1)
    io.savemat(destname, data, oned_as='column')
Esempio n. 15
0
def post_process(f, select_subset=None):
    c = add_constrained_cosmo(az.from_netcdf(f))

    print('Minimum effective sample size is')
    es = az.ess(c).min()
    print(min([es[k] for k in es.keys()]))

    print('Number of constrained samples is {:.1f}'.format(
        np.sum(np.exp(c.posterior.constrained_cosmo_log_wts.values))))

    traceplot(c)

    figure()
    sampled_variables_scatterplot(c)

    figure()
    neff_det_check_plot(c)

    figure()
    Hz_plot(c)

    cosmo_corner_plot(c)
    pop_corner_plot(c)

    figure()
    H0_plot(c)

    figure()
    pure_DE_w_plot(c)

    figure()
    constrained_versus_w0_plot(c)

    figure()
    MMax_plot(c)
    title(
        interval_string(c.posterior['MMax'].values.flatten(),
                        prefix=r'$M_\mathrm{max} = ',
                        postfix=' \, M_\odot$'))

    figure()
    mass_correction_plot(c)

    return c
Esempio n. 16
0
def create_azid(model,
                save=False,
                dir_traces=[],
                fn='azid',
                prior=None,
                trace=None,
                ppc=None):
    """ Convenience: create azid structure """
    print(
        'Will deprecate this in v0.2.0. Functionality to extend now exists in arviz'
    )

    azid = az.from_pymc3(model=model,
                         prior=prior,
                         trace=trace,
                         posterior_predictive=ppc)
    if save:
        azid.to_netcdf(os.path.join(*dir_traces, f'{fn}.netcdf'))
        del azid
        azid = az.from_netcdf(os.path.join(*dir_traces, f'{fn}.netcdf'))
    return azid
Esempio n. 17
0
                     coords=coords)

# more coords
coords["param"] = ["alpha", "beta"]
coords["param_bis"] = ["alpha", "beta"]

### compile the model (this cannot be saved, only the idata) ###
m = fm.covariation(t=t_train,
                   idx=idx_train,
                   y=y_train,
                   coords=coords,
                   dims=dims,
                   sigma=0.5)

# load idata #
m_idata = az.from_netcdf("../models_python/idata_covariation_generic.nc")

### Predictions ###
# load test data
test = pd.read_csv("../data/test.csv")

# get unique values for shared.
t_unique_test = np.unique(test.t.values)
idx_unique_test = np.unique(test.idx.values)

# get n unique for shapes.
n_time_test = len(t_unique_test)
n_idx_test = len(idx_unique_test)

# new coords as well
prediction_coords = {'idx': idx_unique_test, 't': t_unique_test}
    'CA': r'C$_\alpha$',
    'CB': r'C$_\beta$',
    'C': 'C',
    'N': 'N',
}
state_to_name = {
    'like_o' : 'Open',
    'like_fo' : 'Fully Open',
    'like_c' : 'Closed',
}
for nucleus in nuclei_to_name.keys():
    for method in method_to_name.keys():
        for state in state_to_name.keys():
            print(nucleus,method,state)
            model_path = models_dir + f"{model_name}_{method}_{nucleus}.nc"
            my_model = az.from_netcdf(model_path)
            resids = my_model.posterior.resid
            n = resids.shape[0]
            fig, ax = plt.subplots(n // 6 + 1, 6, figsize=(13,15))
            for i in range(6 - n % 6):
                fig.delaxes(ax[-1,-i-1])
            ax = fig.axes
            with az.rc_context(rc={'plot.max_subplots': None}):
                az.plot_ppc(my_model, flatten=['step'], var_names = [state], random_seed=RANDOM_SEED, ax=ax)
            for r, a in zip(resids.to_index(), ax):
                a.set_title(f'{r}', size=12)
                a.set_xlabel('')
                a.legend_.set_visible(False)
            fig.suptitle(f'Posterior Predictive Check {method_to_name[method]} {nuclei_to_name[nucleus]} {state_to_name[state]} (ppm)', y =1.0, size =20)
            fig.tight_layout()
            plt.savefig(f'{reports_dir}for_print/ppc_{state_to_name[state]}_{nucleus}_{method}.png')
Esempio n. 19
0
def plot_cs_differences(
    protein_code,
    target_accept=0.9,
    save=False,
    bmrb_code=None,
    residues=None,
    pymol_session=False,
    ax=None,
    marker="o",
    perct_dict=None,
    plot_kwargs=None,
):

    """Plot the reference densities of CS differences for target protein structures."""

    _, _, reference_df = load_data()
    mean_exp = reference_df["ca_exp"].mean()
    std_exp = reference_df["ca_exp"].std()

    if not plot_kwargs:
        plot_kwargs = {}
    plot_kwargs.setdefault("s", 10)
    plot_kwargs.setdefault("alpha", 1)

    dataframe_full = get_biomolecular_data(protein_code, bmrb_code=bmrb_code)
    if f'idata_{protein_code}.nc' in os.listdir('./data/'):
        idata_target = az.from_netcdf(f'data/idata_{protein_code}.nc')
    else:
        dataframe_reference, idata = hierarchical_reg_reference(target_df=dataframe_full)
        idata_target = idata.sel(
        cheshift_dim_0=slice(dataframe_reference.shape[0]-dataframe_full.shape[0], 
            dataframe_reference.shape[0]))

    idata_target.posterior_predictive = idata_target.posterior_predictive * std_exp + mean_exp

    if residues is None:
        residues = np.unique(dataframe_full.res.values)

    if ax is None:
        _, ax, perct_dict = plot_reference_densities(residues)

    param_list = []

    differences = idata_target.posterior_predictive['cheshift'].values.mean(axis=(0, 1)) - dataframe_full.ca_exp

    len_residues = len(differences)
    red_residues = 0
    yellow_residues = 0
    green_residues = 0
    for a, res in enumerate(residues):

        idx = np.array(dataframe_full.res.values == res).ravel()
        residue_indexes = np.array([dataframe_full.index + 1]).ravel()[idx]

        difference = differences[dataframe_full.res == res]
        n = len(difference)
        jitter = np.linspace(-0.15, 0.0015, n)

        for z, diff in enumerate(difference):

            if diff > 5:
                diff = 5
            if diff < -5:
                diff = -5

            perct = perct_dict[res]

            if diff < perct[1] or diff > perct[-2]:
                color = ["C1", "yellow"]
                red_residues += 1
            elif diff < perct[2] or diff > perct[-3]:
                color = ["C6", "orange"]
                yellow_residues += 1
            else:
                color = ["C2", "green"]
                green_residues += 1

            if res in dataframe_full.res.values:
                ax[a].scatter(
                    diff,
                    jitter[z],
                    marker=marker,
                    c=color[0],
                    linewidth=5,
                    
                    **plot_kwargs,
                )

                param_list.append((residue_indexes[z], color[1], res))
            else:
                print(f"Residue {res} not in protein {protein_code}")

        annot = ax[a].annotate(
            "",
            xy=(0, 0),
            xytext=(7, 7),
            textcoords="offset points",
            bbox=dict(boxstyle="round", fc="k", alpha=0.1),
        )

    print(
        np.round(
            np.array([red_residues, yellow_residues, green_residues])
            / len_residues
            * 100
        )
    )

    if save:
        plt.savefig(os.path.join("images", f"{protein_code}_differences.png"), dpi=600)

    if pymol_session:

        create_pymol_session(
            protein_code,
            param_list,
        )

        print(f"Search working directory for a PyMol session of protein {protein_code}")
        """
    def update_annot(ind):
        pos = sc.get_offsets()[ind["ind"][0]]
        annot.xy = pos
        text = f"{pos[0]:.2f}"
        annot.set_text(text)

    def hover(event):
        vis = annot.get_visible()
        if event.inaxes == ax:
            cont, ind = sc.contains(event)
            if cont:
                update_annot(ind)
                annot.set_visible(True)
                fig.canvas.draw_idle()
            else:
                if vis:
                    annot.set_visible(False)
                    fig.canvas.draw_idle()

    # _.canvas.mpl_connect("motion_notify_event", hover)
        """
    # if residue_list is None:
    #    dataframe_full["colors"] = color_list

    return ax, dataframe_full, perct_dict, idata_target
Esempio n. 20
0
def hierarchical_reg_target(dataframe, target_accept=0.9, samples=2000):
    """
    Runs a hierarchical model over the target structure CS data set.

    Parameters:
    ----------
    dataframe : contains experimental and theoretical CS data

    """
    _, _, reference_dataframe = load_data()
    mean_teo = reference_dataframe["ca_teo"].mean()
    mean_exp = reference_dataframe["ca_exp"].mean()
    std_teo = reference_dataframe["ca_teo"].std()
    std_exp = reference_dataframe["ca_exp"].std()

    ca_exp = (dataframe.ca_exp - mean_exp) / std_exp
    ca_teo = (dataframe.ca_teo - mean_exp) / std_exp

    categories = pd.Categorical(dataframe["res"])
    index = categories.codes
    N = len(np.unique(index))

    if os.path.isfile(os.path.join("data", "trace_reference_structures.nc")):
        trace_all_proteins = az.from_netcdf(os.path.join("data", "trace_reference_structures.nc"))
        print(f"Loaded reference trace from {os.path.join('data', 'trace_reference_structures.nc')}")
    else:
        print(f"could not find reference trace from {os.path.join('data', 'trace_reference_structures.nc')}")
        print("Running model for reference structures")
        dataframe_all_proteins, trace_all_proteins = hierarchical_reg_reference()
        #trace_all_proteins = az.from_pymc3(trace_all_proteins)
        #az.to_netcdf(trace_all_proteins, os.path.join("data", "trace_reference_structures.nc"))
        #dataframe_all_proteins.to_csv(os.path.join("data", "dataframe_reference_structures.csv"))

    learnt_alpha_sd_mean = trace_all_proteins.posterior.alpha_sd.mean(
        dim=["chain", "draw"]
    ).values
    learnt_beta_sd_mean = trace_all_proteins.posterior.beta_sd.mean(
        dim=["chain", "draw"]
    ).values
    learnt_sigma_sd_mean = trace_all_proteins.posterior.sigma_sd.mean(
        dim=["chain", "draw"]
    ).values


    with pm.Model() as model:
        # hyper-priors
        alpha_sd = pm.HalfNormal("alpha_sd", learnt_alpha_sd_mean)
        beta_sd = pm.HalfNormal("beta_sd", learnt_beta_sd_mean)
        sigma_sd = pm.HalfNormal("sigma_sd", learnt_beta_sd_mean)
        # priors
        α = pm.Normal("α", 0, alpha_sd, shape=N)
        β = pm.HalfNormal("β", beta_sd, shape=N)
        σ = pm.HalfNormal("σ", sigma_sd, shape=N)
        # linear model
        μ = pm.Deterministic("μ", α[index] + β[index] * ca_teo)
        # likelihood
        cheshift = pm.Normal("cheshift", mu=μ, sigma=σ[index], observed=ca_exp)
        idata = pm.sample(samples, tune=2000, random_seed=18759, target_accept=0.9, return_inferencedata=True)
        pps = pm.sample_posterior_predictive(idata, samples=samples * idata.posterior.dims["chain"], random_seed=18759)
        idata.add_groups({"posterior_predictive":{"cheshift":pps["cheshift"][None,:,:]}})

    return dataframe, idata
Esempio n. 21
0
extension_data_20 = np.loadtxt('extension_data_20.csv', delimiter=',')
stress_data_40 = np.loadtxt('stress_data_40.csv', delimiter=',')
extension_data_40 = np.loadtxt('extension_data_40.csv', delimiter=',')
stress_data_RS = np.loadtxt('stress_data_RS.csv', delimiter=',')
extension_data_RS = np.loadtxt('extension_data_RS.csv', delimiter=',')

mean_stress_data_4 = np.loadtxt('mean_stress_data_4.csv', delimiter=',')
mean_stress_data_20 = np.loadtxt('mean_stress_data_20.csv', delimiter=',')
mean_stress_data_40 = np.loadtxt('mean_stress_data_40.csv', delimiter=',')
mean_stress_data_RS = np.loadtxt('mean_stress_data_RS.csv', delimiter=',')
std_stress_data_4 = np.loadtxt('std_stress_data_4.csv', delimiter=',')
std_stress_data_20 = np.loadtxt('std_stress_data_20.csv', delimiter=',')
std_stress_data_40 = np.loadtxt('std_stress_data_40.csv', delimiter=',')
std_stress_data_RS = np.loadtxt('std_stress_data_RS.csv', delimiter=',')

data = az.from_netcdf('save_arviz_data_stanwound')

az.style.use("default")

az.rhat(data, var_names=['kv', 'k0', 'kf', 'k2', 'b', 'mu', 'phif'])

extra_kwargs = {"color": "lightsteelblue"}

az.plot_ess(data,
            kind="local",
            var_names=['kv', 'k0', 'kf', 'k2', 'b', 'mu', 'phif'],
            figsize=(18, 18),
            color="royalblue",
            extra_kwargs=extra_kwargs,
            textsize=20)
Esempio n. 22
0
                         header=0)

# run_modality, either:
# - 'testing' : without grid-search => quick;
# - 'production' : with grid-search => slow;
run_modality = pd.read_csv(os.path.sep.join([INPUT_FOLDER,
                                             input_file_name_05]),
                           header=None,
                           dtype='str')

# load model
varying_intercept_slope_noncentered = joblib.load(
    os.path.sep.join([BASE_DIR_INPUT, output_file_name_21]))

# load ArviZ NetCDF data-set containig MCMC samples
arviz_inference = az.from_netcdf(
    filename=os.path.sep.join([BASE_DIR_OUTPUT, output_file_name_22]))

################################################################################
## 4. PRE-PROCESSING
# drop rows with NaN
user_activities.dropna(axis=0, inplace=True)

# drop duplicates
user_activities.drop_duplicates(inplace=True)

# sort 'variant_description' in alphabetical order
variant_df.sort_values(by='variant_description',
                       axis=0,
                       inplace=True,
                       ascending=True)
                    f'--chains {args.chains} '
                    f'--output-tensor {args.local_directory}/{feature_id}.nc'
                    # slurm logs
                    f' &> {args.local_directory}/{feature_id}.log\n')
                print(cmd_)
                fh.write(cmd_)
        ## Run disBatch with the SLURM environmental parameters
        cmd = f'disBatch {task_fp}'
        cmd = f'{args.job_extra}; {cmd}'
        slurm_env = os.environ.copy()
        print(cmd)
        try:
            output = subprocess.run(cmd, env=slurm_env, check=True, shell=True)
        except subprocess.CalledProcessError as exc:
            print("Status : FAIL", exc.returncode, exc.output)
        else:
            print("Output: \n{}\n".format(output))

    # Aggregate results
    inference_files = [
        f'{args.local_directory}/{feature_id}.nc'
        for feature_id in counts.columns
    ]
    inf_list = [az.from_netcdf(x) for x in inference_files]
    coords = {
        'features': counts.columns,
        'monte_carlo_samples': np.arange(args.monte_carlo_samples)
    }
    samples = merge_inferences(inf_list, 'y_predict', 'log_lhood', coords)
    samples.to_netcdf(args.output_inference)
    
    #if start_ind != 0 and end_ind < 3400:
    #    sys.exit(
    print('we want to calibrate for all Alpine glaciers at once, so all glaciers are selected, even if start_ind or end_ind are given')
    for mb_type in ['mb_monthly', 'mb_pseudo_daily', 'mb_real_daily']:
        for grad_type in ['cte', 'var_an_cycle']:
            # compute apparent mb from any mb ... 
            print(mb_type, grad_type)
            
            if glen_a == 'single':
                for gdir in gdirs:
                    try:
                        # in this case a-factor calibrated individually for each glacier ... 
                        sample_path = '/home/users/lschuster/bayesian_calibration/WFDE5_ISIMIP/burned_trace_plus200samples/'
                        burned_trace = az.from_netcdf(sample_path + '{}_burned_trace_plus200samples_WFDE5_CRU_{}_{}_meltfpriorfreq_bayesian.nc'.format(gdir.rgi_id, mb_type, grad_type))

                        melt_f_point_estimate = az.plots.plot_utils.calculate_point_estimate(
                            'mean', burned_trace.posterior.melt_f.stack(
                                draws=("chain", "draw"))).values
                        pf_point_estimate = az.plots.plot_utils.calculate_point_estimate(
                            'mean',
                            burned_trace.posterior.pf.stack(draws=("chain", "draw"))).values

                        mb = TIModel(gdir, melt_f_point_estimate, mb_type=mb_type,
                                     grad_type=grad_type, baseline_climate=dataset,
                                     residual=0, prcp_fac=pf_point_estimate)
                        mb.historical_climate_qc_mod(gdir)

                        climate.apparent_mb_from_any_mb(gdir, mb_model=mb,
                                                        mb_years=np.arange(y0, ye, 1))
Esempio n. 25
0
def write_summary_table(map_estimate_file: str, sampling_estimate_file: str,
                        subjects: list,
                        output_filename_without_extension: str):
    """
    Reading results of MAP estimation and NUTS sampling and writes a summary table in CSV format.
    :param pathlib.Path|str map_estimate_file: JSON file containing results of MAP estimation
    :param pathlib.Path|str sampling_estimate_file: NetCDF(.NC) file containing results of NUTS sampling
    :param list subjects: list of subject labels used when fitting
    :param pathlib.Path|str output_filename_without_extension: output will be written to this path with .CSV extension
    :return: None
    """

    with open(map_estimate_file) as fp:
        map_estimates = json.load(fp)

    traces = arviz.from_netcdf(sampling_estimate_file)

    traces_summary = arviz.summary(traces)

    group_level_parameters = [
        "mu_alpha", "sigma_alpha", "mu_beta", "sigma_beta"
    ]

    subject_level_parameters = {
        "alpha": lambda subject: (f"alpha_{subject}", None),
        "beta2": lambda subject: (f'beta_{subject}', 0),
        "beta_mb": lambda subject: (f'beta_{subject}', 1),
        "beta_mf0": lambda subject: (f'beta_{subject}', 2),
        "beta_mf1": lambda subject: (f'beta_{subject}', 3),
        "beta_st": lambda subject: (f'beta_{subject}', 4)
    }

    metadata_df = pd.DataFrame(dtype=str)
    data_df = pd.DataFrame(dtype=float)

    for glp in group_level_parameters:

        temp_s = pd.Series(dtype=str)
        temp_s["Parameter"] = glp
        temp_s["Subject/Group-level"] = "Group-level"

        metadata_df = metadata_df.append(pd.DataFrame(temp_s).T,
                                         ignore_index=True)

        temp_f = pd.Series(dtype=float)
        temp_f['Map Estimate'] = map_estimates[glp]
        temp_f['Sampling Estimate:\nPosterior Mean'] = traces_summary.loc[
            glp, "mean"]
        temp_f[
            'Sampling Estimate:\nPosterior Standard Deviation'] = traces_summary.loc[
                glp, "sd"]
        temp_f['Gelman-Rubin diagnostic'] = traces_summary.loc[glp, "r_hat"]

        data_df = data_df.append(pd.DataFrame(temp_f).T, ignore_index=True)

    for subject in subjects:

        for slp, slp_name_func in subject_level_parameters.items():
            temp_s = pd.Series(dtype=str)
            temp_s["Parameter"] = slp
            temp_s["Subject/Group-level"] = subject

            metadata_df = metadata_df.append(pd.DataFrame(temp_s).T,
                                             ignore_index=True)

            temp_f = pd.Series(dtype=float)
            var_name, index = slp_name_func(subject)
            if index is not None:
                traces_var_name = f"{var_name}[{index}]"
                temp_f['Map Estimate'] = map_estimates[var_name][index]
            else:
                traces_var_name = var_name
                temp_f["Map Estimate"] = map_estimates[var_name]

            temp_f['Sampling Estimate:\nPosterior Mean'] = \
                traces_summary.loc[traces_var_name, "mean"]
            temp_f['Sampling Estimate:\nPosterior Standard Deviation'] = \
                traces_summary.loc[traces_var_name, "sd"]
            temp_f['Gelman-Rubin diagnostic'] = \
                traces_summary.loc[traces_var_name, "r_hat"]

            data_df = data_df.append(pd.DataFrame(temp_f).T, ignore_index=True)

    op_df = pd.concat([metadata_df, data_df], axis=1).set_index("Parameter")
    op_df.to_csv(f"{output_filename_without_extension}.csv",
                 float_format='%.3f')
Esempio n. 26
0
        i += 1
print(gdirs[0].rgi_id)
# only first 5 yet ...
# if first_run:

# make an execute_entity_task out of this for the logs?
if ice_thickness_calibration:
    if start_ind != 0 and end_ind < 3400:
        sys.exit(
            'we want to calibrate for all Alpine glaciers at once, so all glaciers have to be selected!'
        )
    # first need to compute the apparent_mb_from_any_mb using the medium best pf/melt_f combination ...
    for gdir in gdirs:
        try:
            burned_trace = az.from_netcdf(
                'alps/{}_burned_trace_plus200samples_{}_{}_{}_meltfprior{}.nc'.
                format(gdir.rgi_id, dataset, mb_type, grad_type, melt_f_prior))
            melt_f_point_estimate = az.plots.plot_utils.calculate_point_estimate(
                'mean',
                burned_trace.posterior.melt_f.stack(draws=("chain",
                                                           "draw"))).values
            pf_point_estimate = az.plots.plot_utils.calculate_point_estimate(
                'mean', burned_trace.posterior.pf.stack(draws=("chain",
                                                               "draw"))).values

            mb = TIModel(gdir,
                         melt_f_point_estimate,
                         mb_type=mb_type,
                         grad_type=grad_type,
                         residual=0,
                         prcp_fac=pf_point_estimate)
Esempio n. 27
0
            print(typ)
            print('missing: {}'.format(len(miss_samples[typ])))
            print('existing: {}'.format(len(exist_samples[typ])))
            print('missing glaciers that have geodetic measurements: ')
            to_retry = list(set(geod_ind.values) & set(miss_samples[typ]))
            print(len(to_retry))
            print(to_retry)
            gdirs = workflow.init_glacier_directories(to_retry)
        elif specific_gdirs is not None:
            gdirs = workflow.init_glacier_directories(specific_gdirs)
        else:
            gdirs = workflow.init_glacier_directories(
                pd_geodetic_comp_alps.dropna().index[start_ind:end_ind])

        burned_trace = az.from_netcdf(
            path +
            'burned_trace_alps_regression_pf_{}_{}.nc'.format(dataset, typ))

        # predict_data = xr.open_dataset('predict_alps_regression_pf_{}.nc'.format(dataset))
        predict_data = burned_trace.predictions
        # melt_f_prior == 'bayesian'
        if melt_f_prior == 'frequentist':
            # this is actually not used anymore, beacuse we only need it for freqeuentist
            filepath = "/home/users/lschuster/bayesian_calibration/all/dict_calib_opt_pf_allrefglaciers_difftypes.pkl"
            dict_pd_calib_opt = pd.read_pickle(filepath)
            typ = '{}_{}'.format(mb_type, grad_type)
            pd_nonan = dict_pd_calib_opt[typ].loc[
                dict_pd_calib_opt[typ].pf_opt.dropna().index]
            pd_nonan_alps = pd_nonan[pd_nonan.O1Region == '11'][[
                'pf_opt', 'solid prcp mean nopf weighted', 'melt_f_opt_pf',
                'amount_glacmsm'
acc_probs.index = acc_probs.index.droplevel(0)

acc_probs = acc_probs.reset_index()

print(acc_probs)

#%%
with open(write_path + "/multi_chain_50_len20000_acc", "wb") as file:
    pkl.dump(acc_probs, file)

#%%
with open(write_path + "/multi_chain_50_len20000_acc", "rb") as file:
    acc_probs = pkl.load(file)

res_all = az.from_netcdf(write_path + "/multi_chain_50_len20000_all")

#%%
coords = {"cell_type": "k__Bacteria;p__Proteobacteria"}
az.plot_trace(res_all, var_names="beta", coords=coords)
plt.show()

#%%
sns.set(style="ticks", font_scale=1)

n_chains = 50
col = [cm.tab20(i % 20) for i in range(n_chains)]

g = sns.FacetGrid(data=acc_probs.loc[acc_probs["Cell Type"].isin([
    "k__Bacteria;p__Fusobacteria", "k__Bacteria;p__Firmicutes",
    "k__Bacteria;p__Tenericutes"
Esempio n. 29
0
# %%
# Plots:
# ~~~~~~
#

# %%
import arviz as az

# %%
# Load data
# ^^^^^^^^^
#

# %%
data = az.from_netcdf('australia')

# %%
data.prior['depth_0'] = data.prior['depths'][0, :, 0]
data.prior['depth_1'] = data.prior['depths'][0, :, 1]
data.prior['depth_2'] = data.prior['depths'][0, :, 2]
data.prior['depth_3'] = data.prior['depths'][0, :, 3]

# %%
data.posterior['depth_0'] = data.posterior['depths'][0, :, 0]
data.posterior['depth_1'] = data.posterior['depths'][0, :, 1]
data.posterior['depth_2'] = data.posterior['depths'][0, :, 2]
data.posterior['depth_3'] = data.posterior['depths'][0, :, 3]

# %%
az.plot_trace(
Esempio n. 30
0
    def load_idata(
        self,
        path): 

        self.m_idata = az.from_netcdf(f'{path}.nc')