def from_netcdf(cls, *file_name, fast_open=False): if len(file_name) == 1: inference_data = [av.from_netcdf(file_name[0])] else: inference_data = [av.from_netcdf(f) for f in file_name] return cls(*inference_data, fast_open=fast_open)
def test_nested_model_to_netcdf(self, tmp_path): with pm.Model("scope") as model: b = pm.Normal("var") trace = pm.sample(100, tune=0) az.to_netcdf(trace, tmp_path / "trace.nc") trace1 = az.from_netcdf(tmp_path / "trace.nc") assert "scope::var" in trace1.posterior
def do_predict(data_path: str): """Generate MCMC samples given a Maud output folder at train_path. This function creates a new directory in output_dir with a name starting with "maud-predict-output". It first copies the testing directory at train_path into the new this directory at new_dir/user_input, then runs the running_stan.predict_out_of_sample function to write samples in new_dir/oos_samples. The trained output is stored in the new_dir/trained_samples folder along with the user input required to generate the trained samples. """ idata_train = az.from_netcdf(os.path.join(data_path, "idata.nc")) mi = load_maud_input(os.path.join(data_path, "user_input")) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud-predict_output-{mi.config.name}-{now}" output_path = os.path.join(data_path, output_name) test_samples_path = os.path.join(output_path, "test_samples") print("Creating output directory: " + output_path) os.mkdir(output_path) os.mkdir(test_samples_path) idata_predict = predict(mi, output_path, idata_train) # delete attrs hack to make netcdf save work: # https://github.com/arviz-devs/arviz/issues/1554 idata_predict.sample_stats.attrs = {} # type: ignore idata_predict.posterior.attrs = {} # type: ignore idata_predict.to_netcdf(os.path.join(output_path, "idata_predict.nc"))
def test_io_function(self, data, eight_schools_params): # create inference data and assert all attributes are present inference_data = self.get_inference_data( # pylint: disable=W0612 data, eight_schools_params) test_dict = { "posterior": ["eta", "theta", "mu", "tau"], "posterior_predictive": ["eta", "theta", "mu", "tau"], "sample_stats": ["eta", "theta", "mu", "tau"], "prior": ["eta", "theta", "mu", "tau"], "prior_predictive": ["eta", "theta", "mu", "tau"], "sample_stats_prior": ["eta", "theta", "mu", "tau"], "observed_data": ["J", "y", "sigma"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails # check filename does not exist and save InferenceData here = os.path.dirname(os.path.abspath(__file__)) data_directory = os.path.join(here, "..", "saved_models") filepath = os.path.join(data_directory, "io_function_testfile.nc") # az -function to_netcdf(inference_data, filepath) # Assert InferenceData has been saved correctly assert os.path.exists(filepath) assert os.path.getsize(filepath) > 0 inference_data2 = from_netcdf(filepath) fails = check_multiple_attrs(test_dict, inference_data2) assert not fails os.remove(filepath) assert not os.path.exists(filepath)
def test_get_scale_factor(self, filename): fp = pathlib.Path(pathlib.Path(__file__).parent, "testdata", filename) idata = arviz.from_netcdf(str(fp)) assert isinstance(idata, arviz.InferenceData) scale_factor = model.get_scale_factor(idata) assert isinstance(scale_factor, xarray.DataArray) assert scale_factor.coords.dims == ("sample", ) assert 1000 < scale_factor.mean() < 200_000
def get_example_results() -> az.InferenceData: """Get example inference results data. Returns: arviz.InferenceData: Inference data object. """ return az.from_netcdf( os.path.join(PACKAGE_DIR, "data", "example_results.nc"))
def test_plot_details(self, filename, plot_positive): fp = pathlib.Path(pathlib.Path(__file__).parent, "testdata", filename) idata = arviz.from_netcdf(str(fp)) assert isinstance(idata, arviz.InferenceData) fig, axs = plotting.plot_details( idata, plot_positive=plot_positive ) pyplot.close()
def test_get_case_curves(self, filename): fp = pathlib.Path(pathlib.Path(__file__).parent, "testdata", filename) idata = arviz.from_netcdf(str(fp)) assert isinstance(idata, arviz.InferenceData) case_curves = model.get_case_curves(idata) assert isinstance(case_curves, tuple) for obj in case_curves: assert isinstance(obj, xarray.DataArray) assert obj.coords.dims == ("date", "sample")
def load_idata( self, path): """Load idata object obtained from saved idata. Args: path (str): String specifying the path for loading the idata. File-extension is automatically inserted and is hardset to .nc. """ self.m_idata = az.from_netcdf(f'{path}.nc')
def main(): infd = az.from_netcdf(NCDF_FILE) scores = pd.read_csv(PREPARED_DATA_CSV) # true_abilities = scores.groupby("name")["true_ability"].first() f, ax = plt.subplots(figsize=[6, 16]) ax = plot_marginals(infd, "ability", ax) f.savefig(os.path.join(PLOT_DIR, "marginals.png"), bbox_inches="tight") f, ax = plt.subplots(figsize=[20, 10]) ax = plot_ppc(infd, scores, ax) f.savefig(os.path.join(PLOT_DIR, "ppc.png"), bbox_inches="tight")
def from_netcdf(cls, netcdf_path: str, restart: bool = False): logger.info(f"Loading NetCDF chain; restart = {restart}") samples = arviz.from_netcdf(netcdf_path) # if we're restarting sampling, take the last position if restart: last = samples.posterior.isel(draw=-1).mean( dim=["chain"]).to_array() initial = np.array(last)[0] # generate the initial values from the mean of the posterior else: initial = np.array( samples.posterior.mean(dim=["chain", "draw"]).to_array())[0] helper_obj = cls(initial) helper_obj.chain = np.array(samples.posterior.to_array()).squeeze() return helper_obj
def task_render_region_result(country: str, region: str, run_date: pd.Timestamp): """ Render a CSV with summary output for a given region """ az.rcParams["data.load"] = "eager" with tempfile.NamedTemporaryFile() as fp: s3.Bucket(S3_BUCKET).download_file( get_inference_data_key(run_date, region, country=country), fp.name) fp.seek(0) inference_data = az.from_netcdf(fp.name) summary = summarize_inference_data(inference_data) key = get_state_output_key(run_date, region, country=country) with fs.open(f"{S3_BUCKET}/{key}", "w") as file: summary.to_csv(file)
def test_io_function(self, data, eight_schools_params): inference_data = self.get_inference_data( # pylint: disable=W0612 data, eight_schools_params) assert hasattr(inference_data, "posterior") here = os.path.dirname(os.path.abspath(__file__)) data_directory = os.path.join(here, "saved_models") filepath = os.path.join(data_directory, "io_function_testfile.nc") # az -function to_netcdf(inference_data, filepath) assert os.path.exists(filepath) assert os.path.getsize(filepath) > 0 inference_data2 = from_netcdf(filepath) assert hasattr(inference_data2, "posterior") os.remove(filepath) assert not os.path.exists(filepath)
def trace_export_4_mat(srcname, destname, n): trace = az.from_netcdf(srcname) data = {} size = trace.posterior.sigma.data.size data['sigma'] = trace.posterior.sigma.data.reshape(size, 1) data['dtau'] = trace.posterior.dtau.data.reshape(size, 1) for i in np.arange(1, n + 1): data['mu' + str(i)] = trace.posterior['mu' + str(i)].data.reshape( size, 1) data['tau' + str(i)] = trace.posterior['tau' + str(i)].data.reshape( size, 1) data['mu' + str(n + 1)] = trace.posterior['mu' + str(n + 1)].data.reshape( size, 1) io.savemat(destname, data, oned_as='column')
def post_process(f, select_subset=None): c = add_constrained_cosmo(az.from_netcdf(f)) print('Minimum effective sample size is') es = az.ess(c).min() print(min([es[k] for k in es.keys()])) print('Number of constrained samples is {:.1f}'.format( np.sum(np.exp(c.posterior.constrained_cosmo_log_wts.values)))) traceplot(c) figure() sampled_variables_scatterplot(c) figure() neff_det_check_plot(c) figure() Hz_plot(c) cosmo_corner_plot(c) pop_corner_plot(c) figure() H0_plot(c) figure() pure_DE_w_plot(c) figure() constrained_versus_w0_plot(c) figure() MMax_plot(c) title( interval_string(c.posterior['MMax'].values.flatten(), prefix=r'$M_\mathrm{max} = ', postfix=' \, M_\odot$')) figure() mass_correction_plot(c) return c
def create_azid(model, save=False, dir_traces=[], fn='azid', prior=None, trace=None, ppc=None): """ Convenience: create azid structure """ print( 'Will deprecate this in v0.2.0. Functionality to extend now exists in arviz' ) azid = az.from_pymc3(model=model, prior=prior, trace=trace, posterior_predictive=ppc) if save: azid.to_netcdf(os.path.join(*dir_traces, f'{fn}.netcdf')) del azid azid = az.from_netcdf(os.path.join(*dir_traces, f'{fn}.netcdf')) return azid
coords=coords) # more coords coords["param"] = ["alpha", "beta"] coords["param_bis"] = ["alpha", "beta"] ### compile the model (this cannot be saved, only the idata) ### m = fm.covariation(t=t_train, idx=idx_train, y=y_train, coords=coords, dims=dims, sigma=0.5) # load idata # m_idata = az.from_netcdf("../models_python/idata_covariation_generic.nc") ### Predictions ### # load test data test = pd.read_csv("../data/test.csv") # get unique values for shared. t_unique_test = np.unique(test.t.values) idx_unique_test = np.unique(test.idx.values) # get n unique for shapes. n_time_test = len(t_unique_test) n_idx_test = len(idx_unique_test) # new coords as well prediction_coords = {'idx': idx_unique_test, 't': t_unique_test}
'CA': r'C$_\alpha$', 'CB': r'C$_\beta$', 'C': 'C', 'N': 'N', } state_to_name = { 'like_o' : 'Open', 'like_fo' : 'Fully Open', 'like_c' : 'Closed', } for nucleus in nuclei_to_name.keys(): for method in method_to_name.keys(): for state in state_to_name.keys(): print(nucleus,method,state) model_path = models_dir + f"{model_name}_{method}_{nucleus}.nc" my_model = az.from_netcdf(model_path) resids = my_model.posterior.resid n = resids.shape[0] fig, ax = plt.subplots(n // 6 + 1, 6, figsize=(13,15)) for i in range(6 - n % 6): fig.delaxes(ax[-1,-i-1]) ax = fig.axes with az.rc_context(rc={'plot.max_subplots': None}): az.plot_ppc(my_model, flatten=['step'], var_names = [state], random_seed=RANDOM_SEED, ax=ax) for r, a in zip(resids.to_index(), ax): a.set_title(f'{r}', size=12) a.set_xlabel('') a.legend_.set_visible(False) fig.suptitle(f'Posterior Predictive Check {method_to_name[method]} {nuclei_to_name[nucleus]} {state_to_name[state]} (ppm)', y =1.0, size =20) fig.tight_layout() plt.savefig(f'{reports_dir}for_print/ppc_{state_to_name[state]}_{nucleus}_{method}.png')
def plot_cs_differences( protein_code, target_accept=0.9, save=False, bmrb_code=None, residues=None, pymol_session=False, ax=None, marker="o", perct_dict=None, plot_kwargs=None, ): """Plot the reference densities of CS differences for target protein structures.""" _, _, reference_df = load_data() mean_exp = reference_df["ca_exp"].mean() std_exp = reference_df["ca_exp"].std() if not plot_kwargs: plot_kwargs = {} plot_kwargs.setdefault("s", 10) plot_kwargs.setdefault("alpha", 1) dataframe_full = get_biomolecular_data(protein_code, bmrb_code=bmrb_code) if f'idata_{protein_code}.nc' in os.listdir('./data/'): idata_target = az.from_netcdf(f'data/idata_{protein_code}.nc') else: dataframe_reference, idata = hierarchical_reg_reference(target_df=dataframe_full) idata_target = idata.sel( cheshift_dim_0=slice(dataframe_reference.shape[0]-dataframe_full.shape[0], dataframe_reference.shape[0])) idata_target.posterior_predictive = idata_target.posterior_predictive * std_exp + mean_exp if residues is None: residues = np.unique(dataframe_full.res.values) if ax is None: _, ax, perct_dict = plot_reference_densities(residues) param_list = [] differences = idata_target.posterior_predictive['cheshift'].values.mean(axis=(0, 1)) - dataframe_full.ca_exp len_residues = len(differences) red_residues = 0 yellow_residues = 0 green_residues = 0 for a, res in enumerate(residues): idx = np.array(dataframe_full.res.values == res).ravel() residue_indexes = np.array([dataframe_full.index + 1]).ravel()[idx] difference = differences[dataframe_full.res == res] n = len(difference) jitter = np.linspace(-0.15, 0.0015, n) for z, diff in enumerate(difference): if diff > 5: diff = 5 if diff < -5: diff = -5 perct = perct_dict[res] if diff < perct[1] or diff > perct[-2]: color = ["C1", "yellow"] red_residues += 1 elif diff < perct[2] or diff > perct[-3]: color = ["C6", "orange"] yellow_residues += 1 else: color = ["C2", "green"] green_residues += 1 if res in dataframe_full.res.values: ax[a].scatter( diff, jitter[z], marker=marker, c=color[0], linewidth=5, **plot_kwargs, ) param_list.append((residue_indexes[z], color[1], res)) else: print(f"Residue {res} not in protein {protein_code}") annot = ax[a].annotate( "", xy=(0, 0), xytext=(7, 7), textcoords="offset points", bbox=dict(boxstyle="round", fc="k", alpha=0.1), ) print( np.round( np.array([red_residues, yellow_residues, green_residues]) / len_residues * 100 ) ) if save: plt.savefig(os.path.join("images", f"{protein_code}_differences.png"), dpi=600) if pymol_session: create_pymol_session( protein_code, param_list, ) print(f"Search working directory for a PyMol session of protein {protein_code}") """ def update_annot(ind): pos = sc.get_offsets()[ind["ind"][0]] annot.xy = pos text = f"{pos[0]:.2f}" annot.set_text(text) def hover(event): vis = annot.get_visible() if event.inaxes == ax: cont, ind = sc.contains(event) if cont: update_annot(ind) annot.set_visible(True) fig.canvas.draw_idle() else: if vis: annot.set_visible(False) fig.canvas.draw_idle() # _.canvas.mpl_connect("motion_notify_event", hover) """ # if residue_list is None: # dataframe_full["colors"] = color_list return ax, dataframe_full, perct_dict, idata_target
def hierarchical_reg_target(dataframe, target_accept=0.9, samples=2000): """ Runs a hierarchical model over the target structure CS data set. Parameters: ---------- dataframe : contains experimental and theoretical CS data """ _, _, reference_dataframe = load_data() mean_teo = reference_dataframe["ca_teo"].mean() mean_exp = reference_dataframe["ca_exp"].mean() std_teo = reference_dataframe["ca_teo"].std() std_exp = reference_dataframe["ca_exp"].std() ca_exp = (dataframe.ca_exp - mean_exp) / std_exp ca_teo = (dataframe.ca_teo - mean_exp) / std_exp categories = pd.Categorical(dataframe["res"]) index = categories.codes N = len(np.unique(index)) if os.path.isfile(os.path.join("data", "trace_reference_structures.nc")): trace_all_proteins = az.from_netcdf(os.path.join("data", "trace_reference_structures.nc")) print(f"Loaded reference trace from {os.path.join('data', 'trace_reference_structures.nc')}") else: print(f"could not find reference trace from {os.path.join('data', 'trace_reference_structures.nc')}") print("Running model for reference structures") dataframe_all_proteins, trace_all_proteins = hierarchical_reg_reference() #trace_all_proteins = az.from_pymc3(trace_all_proteins) #az.to_netcdf(trace_all_proteins, os.path.join("data", "trace_reference_structures.nc")) #dataframe_all_proteins.to_csv(os.path.join("data", "dataframe_reference_structures.csv")) learnt_alpha_sd_mean = trace_all_proteins.posterior.alpha_sd.mean( dim=["chain", "draw"] ).values learnt_beta_sd_mean = trace_all_proteins.posterior.beta_sd.mean( dim=["chain", "draw"] ).values learnt_sigma_sd_mean = trace_all_proteins.posterior.sigma_sd.mean( dim=["chain", "draw"] ).values with pm.Model() as model: # hyper-priors alpha_sd = pm.HalfNormal("alpha_sd", learnt_alpha_sd_mean) beta_sd = pm.HalfNormal("beta_sd", learnt_beta_sd_mean) sigma_sd = pm.HalfNormal("sigma_sd", learnt_beta_sd_mean) # priors α = pm.Normal("α", 0, alpha_sd, shape=N) β = pm.HalfNormal("β", beta_sd, shape=N) σ = pm.HalfNormal("σ", sigma_sd, shape=N) # linear model μ = pm.Deterministic("μ", α[index] + β[index] * ca_teo) # likelihood cheshift = pm.Normal("cheshift", mu=μ, sigma=σ[index], observed=ca_exp) idata = pm.sample(samples, tune=2000, random_seed=18759, target_accept=0.9, return_inferencedata=True) pps = pm.sample_posterior_predictive(idata, samples=samples * idata.posterior.dims["chain"], random_seed=18759) idata.add_groups({"posterior_predictive":{"cheshift":pps["cheshift"][None,:,:]}}) return dataframe, idata
extension_data_20 = np.loadtxt('extension_data_20.csv', delimiter=',') stress_data_40 = np.loadtxt('stress_data_40.csv', delimiter=',') extension_data_40 = np.loadtxt('extension_data_40.csv', delimiter=',') stress_data_RS = np.loadtxt('stress_data_RS.csv', delimiter=',') extension_data_RS = np.loadtxt('extension_data_RS.csv', delimiter=',') mean_stress_data_4 = np.loadtxt('mean_stress_data_4.csv', delimiter=',') mean_stress_data_20 = np.loadtxt('mean_stress_data_20.csv', delimiter=',') mean_stress_data_40 = np.loadtxt('mean_stress_data_40.csv', delimiter=',') mean_stress_data_RS = np.loadtxt('mean_stress_data_RS.csv', delimiter=',') std_stress_data_4 = np.loadtxt('std_stress_data_4.csv', delimiter=',') std_stress_data_20 = np.loadtxt('std_stress_data_20.csv', delimiter=',') std_stress_data_40 = np.loadtxt('std_stress_data_40.csv', delimiter=',') std_stress_data_RS = np.loadtxt('std_stress_data_RS.csv', delimiter=',') data = az.from_netcdf('save_arviz_data_stanwound') az.style.use("default") az.rhat(data, var_names=['kv', 'k0', 'kf', 'k2', 'b', 'mu', 'phif']) extra_kwargs = {"color": "lightsteelblue"} az.plot_ess(data, kind="local", var_names=['kv', 'k0', 'kf', 'k2', 'b', 'mu', 'phif'], figsize=(18, 18), color="royalblue", extra_kwargs=extra_kwargs, textsize=20)
header=0) # run_modality, either: # - 'testing' : without grid-search => quick; # - 'production' : with grid-search => slow; run_modality = pd.read_csv(os.path.sep.join([INPUT_FOLDER, input_file_name_05]), header=None, dtype='str') # load model varying_intercept_slope_noncentered = joblib.load( os.path.sep.join([BASE_DIR_INPUT, output_file_name_21])) # load ArviZ NetCDF data-set containig MCMC samples arviz_inference = az.from_netcdf( filename=os.path.sep.join([BASE_DIR_OUTPUT, output_file_name_22])) ################################################################################ ## 4. PRE-PROCESSING # drop rows with NaN user_activities.dropna(axis=0, inplace=True) # drop duplicates user_activities.drop_duplicates(inplace=True) # sort 'variant_description' in alphabetical order variant_df.sort_values(by='variant_description', axis=0, inplace=True, ascending=True)
f'--chains {args.chains} ' f'--output-tensor {args.local_directory}/{feature_id}.nc' # slurm logs f' &> {args.local_directory}/{feature_id}.log\n') print(cmd_) fh.write(cmd_) ## Run disBatch with the SLURM environmental parameters cmd = f'disBatch {task_fp}' cmd = f'{args.job_extra}; {cmd}' slurm_env = os.environ.copy() print(cmd) try: output = subprocess.run(cmd, env=slurm_env, check=True, shell=True) except subprocess.CalledProcessError as exc: print("Status : FAIL", exc.returncode, exc.output) else: print("Output: \n{}\n".format(output)) # Aggregate results inference_files = [ f'{args.local_directory}/{feature_id}.nc' for feature_id in counts.columns ] inf_list = [az.from_netcdf(x) for x in inference_files] coords = { 'features': counts.columns, 'monte_carlo_samples': np.arange(args.monte_carlo_samples) } samples = merge_inferences(inf_list, 'y_predict', 'log_lhood', coords) samples.to_netcdf(args.output_inference)
#if start_ind != 0 and end_ind < 3400: # sys.exit( print('we want to calibrate for all Alpine glaciers at once, so all glaciers are selected, even if start_ind or end_ind are given') for mb_type in ['mb_monthly', 'mb_pseudo_daily', 'mb_real_daily']: for grad_type in ['cte', 'var_an_cycle']: # compute apparent mb from any mb ... print(mb_type, grad_type) if glen_a == 'single': for gdir in gdirs: try: # in this case a-factor calibrated individually for each glacier ... sample_path = '/home/users/lschuster/bayesian_calibration/WFDE5_ISIMIP/burned_trace_plus200samples/' burned_trace = az.from_netcdf(sample_path + '{}_burned_trace_plus200samples_WFDE5_CRU_{}_{}_meltfpriorfreq_bayesian.nc'.format(gdir.rgi_id, mb_type, grad_type)) melt_f_point_estimate = az.plots.plot_utils.calculate_point_estimate( 'mean', burned_trace.posterior.melt_f.stack( draws=("chain", "draw"))).values pf_point_estimate = az.plots.plot_utils.calculate_point_estimate( 'mean', burned_trace.posterior.pf.stack(draws=("chain", "draw"))).values mb = TIModel(gdir, melt_f_point_estimate, mb_type=mb_type, grad_type=grad_type, baseline_climate=dataset, residual=0, prcp_fac=pf_point_estimate) mb.historical_climate_qc_mod(gdir) climate.apparent_mb_from_any_mb(gdir, mb_model=mb, mb_years=np.arange(y0, ye, 1))
def write_summary_table(map_estimate_file: str, sampling_estimate_file: str, subjects: list, output_filename_without_extension: str): """ Reading results of MAP estimation and NUTS sampling and writes a summary table in CSV format. :param pathlib.Path|str map_estimate_file: JSON file containing results of MAP estimation :param pathlib.Path|str sampling_estimate_file: NetCDF(.NC) file containing results of NUTS sampling :param list subjects: list of subject labels used when fitting :param pathlib.Path|str output_filename_without_extension: output will be written to this path with .CSV extension :return: None """ with open(map_estimate_file) as fp: map_estimates = json.load(fp) traces = arviz.from_netcdf(sampling_estimate_file) traces_summary = arviz.summary(traces) group_level_parameters = [ "mu_alpha", "sigma_alpha", "mu_beta", "sigma_beta" ] subject_level_parameters = { "alpha": lambda subject: (f"alpha_{subject}", None), "beta2": lambda subject: (f'beta_{subject}', 0), "beta_mb": lambda subject: (f'beta_{subject}', 1), "beta_mf0": lambda subject: (f'beta_{subject}', 2), "beta_mf1": lambda subject: (f'beta_{subject}', 3), "beta_st": lambda subject: (f'beta_{subject}', 4) } metadata_df = pd.DataFrame(dtype=str) data_df = pd.DataFrame(dtype=float) for glp in group_level_parameters: temp_s = pd.Series(dtype=str) temp_s["Parameter"] = glp temp_s["Subject/Group-level"] = "Group-level" metadata_df = metadata_df.append(pd.DataFrame(temp_s).T, ignore_index=True) temp_f = pd.Series(dtype=float) temp_f['Map Estimate'] = map_estimates[glp] temp_f['Sampling Estimate:\nPosterior Mean'] = traces_summary.loc[ glp, "mean"] temp_f[ 'Sampling Estimate:\nPosterior Standard Deviation'] = traces_summary.loc[ glp, "sd"] temp_f['Gelman-Rubin diagnostic'] = traces_summary.loc[glp, "r_hat"] data_df = data_df.append(pd.DataFrame(temp_f).T, ignore_index=True) for subject in subjects: for slp, slp_name_func in subject_level_parameters.items(): temp_s = pd.Series(dtype=str) temp_s["Parameter"] = slp temp_s["Subject/Group-level"] = subject metadata_df = metadata_df.append(pd.DataFrame(temp_s).T, ignore_index=True) temp_f = pd.Series(dtype=float) var_name, index = slp_name_func(subject) if index is not None: traces_var_name = f"{var_name}[{index}]" temp_f['Map Estimate'] = map_estimates[var_name][index] else: traces_var_name = var_name temp_f["Map Estimate"] = map_estimates[var_name] temp_f['Sampling Estimate:\nPosterior Mean'] = \ traces_summary.loc[traces_var_name, "mean"] temp_f['Sampling Estimate:\nPosterior Standard Deviation'] = \ traces_summary.loc[traces_var_name, "sd"] temp_f['Gelman-Rubin diagnostic'] = \ traces_summary.loc[traces_var_name, "r_hat"] data_df = data_df.append(pd.DataFrame(temp_f).T, ignore_index=True) op_df = pd.concat([metadata_df, data_df], axis=1).set_index("Parameter") op_df.to_csv(f"{output_filename_without_extension}.csv", float_format='%.3f')
i += 1 print(gdirs[0].rgi_id) # only first 5 yet ... # if first_run: # make an execute_entity_task out of this for the logs? if ice_thickness_calibration: if start_ind != 0 and end_ind < 3400: sys.exit( 'we want to calibrate for all Alpine glaciers at once, so all glaciers have to be selected!' ) # first need to compute the apparent_mb_from_any_mb using the medium best pf/melt_f combination ... for gdir in gdirs: try: burned_trace = az.from_netcdf( 'alps/{}_burned_trace_plus200samples_{}_{}_{}_meltfprior{}.nc'. format(gdir.rgi_id, dataset, mb_type, grad_type, melt_f_prior)) melt_f_point_estimate = az.plots.plot_utils.calculate_point_estimate( 'mean', burned_trace.posterior.melt_f.stack(draws=("chain", "draw"))).values pf_point_estimate = az.plots.plot_utils.calculate_point_estimate( 'mean', burned_trace.posterior.pf.stack(draws=("chain", "draw"))).values mb = TIModel(gdir, melt_f_point_estimate, mb_type=mb_type, grad_type=grad_type, residual=0, prcp_fac=pf_point_estimate)
print(typ) print('missing: {}'.format(len(miss_samples[typ]))) print('existing: {}'.format(len(exist_samples[typ]))) print('missing glaciers that have geodetic measurements: ') to_retry = list(set(geod_ind.values) & set(miss_samples[typ])) print(len(to_retry)) print(to_retry) gdirs = workflow.init_glacier_directories(to_retry) elif specific_gdirs is not None: gdirs = workflow.init_glacier_directories(specific_gdirs) else: gdirs = workflow.init_glacier_directories( pd_geodetic_comp_alps.dropna().index[start_ind:end_ind]) burned_trace = az.from_netcdf( path + 'burned_trace_alps_regression_pf_{}_{}.nc'.format(dataset, typ)) # predict_data = xr.open_dataset('predict_alps_regression_pf_{}.nc'.format(dataset)) predict_data = burned_trace.predictions # melt_f_prior == 'bayesian' if melt_f_prior == 'frequentist': # this is actually not used anymore, beacuse we only need it for freqeuentist filepath = "/home/users/lschuster/bayesian_calibration/all/dict_calib_opt_pf_allrefglaciers_difftypes.pkl" dict_pd_calib_opt = pd.read_pickle(filepath) typ = '{}_{}'.format(mb_type, grad_type) pd_nonan = dict_pd_calib_opt[typ].loc[ dict_pd_calib_opt[typ].pf_opt.dropna().index] pd_nonan_alps = pd_nonan[pd_nonan.O1Region == '11'][[ 'pf_opt', 'solid prcp mean nopf weighted', 'melt_f_opt_pf', 'amount_glacmsm'
acc_probs.index = acc_probs.index.droplevel(0) acc_probs = acc_probs.reset_index() print(acc_probs) #%% with open(write_path + "/multi_chain_50_len20000_acc", "wb") as file: pkl.dump(acc_probs, file) #%% with open(write_path + "/multi_chain_50_len20000_acc", "rb") as file: acc_probs = pkl.load(file) res_all = az.from_netcdf(write_path + "/multi_chain_50_len20000_all") #%% coords = {"cell_type": "k__Bacteria;p__Proteobacteria"} az.plot_trace(res_all, var_names="beta", coords=coords) plt.show() #%% sns.set(style="ticks", font_scale=1) n_chains = 50 col = [cm.tab20(i % 20) for i in range(n_chains)] g = sns.FacetGrid(data=acc_probs.loc[acc_probs["Cell Type"].isin([ "k__Bacteria;p__Fusobacteria", "k__Bacteria;p__Firmicutes", "k__Bacteria;p__Tenericutes"
# %% # Plots: # ~~~~~~ # # %% import arviz as az # %% # Load data # ^^^^^^^^^ # # %% data = az.from_netcdf('australia') # %% data.prior['depth_0'] = data.prior['depths'][0, :, 0] data.prior['depth_1'] = data.prior['depths'][0, :, 1] data.prior['depth_2'] = data.prior['depths'][0, :, 2] data.prior['depth_3'] = data.prior['depths'][0, :, 3] # %% data.posterior['depth_0'] = data.posterior['depths'][0, :, 0] data.posterior['depth_1'] = data.posterior['depths'][0, :, 1] data.posterior['depth_2'] = data.posterior['depths'][0, :, 2] data.posterior['depth_3'] = data.posterior['depths'][0, :, 3] # %% az.plot_trace(
def load_idata( self, path): self.m_idata = az.from_netcdf(f'{path}.nc')