def run(ff, prior, regularization_strength): predictions, measurements, uncertainties = experiment_loader.load(ff, stride=ALA3.cross_val_stride) if prior == "maxent": model_factory = lambda predictions, measurements, uncertainties: belt.MaxEntBELT(predictions, measurements, uncertainties, regularization_strength) elif prior == "dirichlet": model_factory = lambda predictions, measurements, uncertainties: belt.DirichletBELT(predictions, measurements, uncertainties, regularization_strength) elif prior == "MVN": precision = np.cov(predictions.values.T) model_factory = lambda predictions, measurements, uncertainties: belt.MVNBELT(predictions, measurements, uncertainties, regularization_strength, precision=precision) bootstrap_index_list = np.array_split(np.arange(len(predictions)), ALA3.kfold) train_chi, test_chi = belt.cross_validated_mcmc(predictions.values, measurements.values, uncertainties.values, model_factory, bootstrap_index_list, ALA3.num_samples, thin=ALA3.thin) print regularization_strength, train_chi.mean(), test_chi.mean() F = open(ALA3.cross_val_filename, 'a') F.write("%s,%s,%f,%d,%d,%f,%f \n"% (ff, prior, regularization_strength, ALA3.cross_val_stride, ALA3.num_samples, train_chi.mean(), test_chi.mean())) F.close()
def run(ff, prior, regularization_strength, bootstrap_index_list): pymc_filename = ALA3.data_directory + "/models/model_%s_%s_reg-%.1f-BB%d.h5" % (ff, prior, regularization_strength, bayesian_bootstrap_run) populations_filename = ALA3.data_directory + "/frame_populations/pops_%s_%s_reg-%.1f-BB%d.dat" % (ff, prior, regularization_strength, bayesian_bootstrap_run) predictions, measurements, uncertainties = experiment_loader.load(ff) num_frames, num_measurements = predictions.shape bootstrap_index_list = np.array_split(np.arange(num_frames), ALA3.num_blocks) if bayesian_bootstrap_run == 0: prior_pops = None else: prior_pops = ensemble_fitter.sample_prior_pops(num_frames, bootstrap_index_list) if prior == "maxent": model = belt.MaxEntBELT(predictions.values, measurements.values, uncertainties.values, regularization_strength, prior_pops=prior_pops) elif prior == "dirichlet": model = belt.DirichletBELT(predictions.values, measurements.values, uncertainties.values, regularization_strength, prior_pops=prior_pops) elif prior == "MVN": model = belt.MVNBELT(predictions.values, measurements.values, uncertainties.values, regularization_strength, prior_pops=prior_pops) model.sample(ALA3.num_samples, thin=ALA3.thin, burn=ALA3.burn, filename=pymc_filename) p = model.accumulate_populations() np.savetxt(populations_filename, p)
matplotlib.rcParams.update({'font.size': 20}) alpha = 0.2 num_grid = 500 phi = np.linspace(-180,180,num_grid) O = np.ones(num_grid) colors = ["b","g","r","y"] simulation_data = {} ff = "amber99" # Load FF data for comparison, not used in actual figure phi1, psi1, ass_raw, state_ind = experiment_loader.load_rama(ff, 1) J = scalar_couplings.J3_HN_HA(phi) predictions, measurements, uncertainties = experiment_loader.load(ff, keys=[("JC", 2, "J3_HN_HA")]) yi = measurements.iloc[0] oi = uncertainties.iloc[0] factor = 1.0 regularization_strength = 0.2 model = belt.MaxEntBELT(predictions.values, measurements.values, factor * uncertainties.values, regularization_strength) model.sample(5000) #obs = belt. ai = model.mcmc.trace("alpha")[:] mu = model.trace_observable(predictions.values[:,0]) n = len(predictions) prior_pops = np.ones(n) / float(n)
from fitensemble import lvbp import pandas as pd import numpy as np import matplotlib.pyplot as plt import ALA3 import experiment_loader ff = "amber99" prior = "maxent" regularization_strength = ALA3.regularization_strength_dict[prior][ff] #regularization_strength = 5.0 data_directory = "/%s/%s/" % (ALA3.data_dir, ff) model_directory = "/%s/%s/models-%s/" % (ALA3.data_dir, ff, prior) #model_directory = "/%s/%s/models-all-expt-%s/" % (ALA3.data_dir, ff, prior) predictions, measurements, uncertainties = experiment_loader.load(data_directory) phi, psi, ass_raw, state_ind = experiment_loader.load_rama(data_directory, 1) lvbp_model = lvbp.LVBP.load(model_directory + "/reg-%d-BB0.h5" % regularization_strength) #p = np.loadtxt(model_directory + "reg-%d-frame-populations.dat" % regularization_strength) a = lvbp_model.mcmc.trace("alpha")[:] plot(a[:,0])
import pandas as pd import numpy as np import experiment_loader import ALA3 from fitensemble import belt import itertools num_BB = 2 grid = itertools.product(ALA3.ff_list, ALA3.prior_list) data = {} for k, (ff, prior) in enumerate(grid): print(ff, prior) regularization_strength = ALA3.regularization_strength_dict[prior][ff] predictions, measurements, uncertainties = experiment_loader.load(ff, keys=None) z = (predictions.mean() - measurements) / uncertainties chi2_all = (z ** 2).mean() chi2_train = (z[ALA3.train_keys] ** 2).mean() chi2_test = (z[ALA3.test_keys] ** 2).mean() data[ff] = chi2_all, chi2_train, chi2_test mcmc_filename = "mcmc_traces/mu_%s_%s_reg-%.1f-BB%d.h5" mu_mcmc = pd.concat([pd.HDFStore(mcmc_filename % (ff, prior, regularization_strength, bayesian_bootstrap_run))["data"] for bayesian_bootstrap_run in range(num_BB)]) z = (mu_mcmc - measurements) / uncertainties chi2_all = (z ** 2).mean().mean() chi2_train = (z[ALA3.train_keys] ** 2).mean().mean() chi2_test = (z[ALA3.test_keys] ** 2).mean().mean() data["%s_%s" % (ff, prior)] = chi2_all, chi2_train, chi2_test columns = ["all", "train", "test"] data = pd.DataFrame(data, index=columns).T print("***********")
import experiment_loader import sys import ALA3 ff = "amber96" effective_counts = 1000.0 num_bins = 0 num_states = num_bins ** 2 prior = "BW%d" % num_bins directory = "%s/%s" % (ALA3.data_dir, ff) out_dir = directory + "/models-%s/" % prior pymc_filename = out_dir + "/model.h5" predictions, measurements, uncertainties = experiment_loader.load(ff, stride=ALA3.stride) phi, psi, ass_raw, state_ind = experiment_loader.load_rama(ff, ALA3.stride) num_frames, num_measurements = predictions.shape if num_bins != 0: assignments = schwalbe_couplings.assign_grid(phi, psi, num_bins)[2] else: assignments = ass_raw prior_state_pops = np.bincount(assignments).astype("float") prior_state_pops /= prior_state_pops.sum() prior_state_pops *= effective_counts ALA3.bw_num_samples = 10000 model = bayesian_weighting.BayesianWeighting( predictions.values, measurements.values, uncertainties.values, assignments, prior_state_pops=prior_state_pops
import numpy as np from fitensemble import lvbp import experiment_loader import sys import ALA3 regularization_strength = 5.0 prior = "maxent" ff = "amber99" bayesian_bootstrap_run = 0 num_samples = 100000000 stride = 10 thin = 1000 directory = "%s/%s" % (ALA3.data_dir , ff) out_dir = directory + "/models-all-expt-%s/" % prior pymc_filename = out_dir + "/reg-%d-BB%d.h5" % (regularization_strength, bayesian_bootstrap_run) predictions, measurements, uncertainties, phi, psi, ass_raw, state_ind = experiment_loader.load(directory, stride=stride, select_keys=ALA3.all_keys) print(predictions) num_frames, num_measurements = predictions.shape model = lvbp.MaxEnt_LVBP(predictions.values, measurements.values, uncertainties.values, regularization_strength) model.sample(num_samples, thin=thin, burn=ALA3.burn, filename=pymc_filename) p = model.accumulate_populations() np.savetxt(out_dir+"/reg-%d-frame-populations.dat" % (regularization_strength), p)
import pandas as pd import numpy as np from fitensemble import bayesian_weighting, belt import experiment_loader import ALA3 ff = "amber96" regularization_strength = 10.0 stride = 40 thin = 500 steps = 1000000 predictions, measurements, uncertainties = experiment_loader.load(ff, stride=stride) model = belt.MaxEntBELT(predictions.values, measurements.values, uncertainties.values, regularization_strength) model.sample(steps, thin=thin, burn=ALA3.burn) chi2 = [] prior = [] H_terms = [] for j, p in enumerate(model.iterate_populations()): mu = predictions.T.dot(p) chi2.append(0.5 * (((mu - measurements) / uncertainties) ** 2).sum()) prior.append(regularization_strength * -1.0 * p.dot(np.log(p))) H = -np.diag(p[:-1] ** -1.) - p[-1] ** -1. H_terms.append(0.5 * np.linalg.slogdet(H)[1]) R = pd.DataFrame({"chi2":chi2, "prior":prior, "H":H_terms})
import numpy as np from fitensemble import bayesian_weighting import experiment_loader import sys import ALA3 prior = "BW" ff = "amber96" effective_counts = 1. out_dir = ALA3.data_directory + "/BW_models/" pymc_filename = out_dir + "/model_BW_%s.h5" % (ff) predictions, measurements, uncertainties = experiment_loader.load(ff) phi, psi, ass_raw, state_ind = experiment_loader.load_rama(ff, ALA3.stride) num_frames, num_measurements = predictions.shape num_states = ass_raw.max() + 1 prior_state_pops = np.bincount(ass_raw).astype('float') prior_state_pops /= prior_state_pops.sum() prior_state_pops *= effective_counts prior_state_pops = np.ones(num_states) model = bayesian_weighting.BayesianWeighting.load(pymc_filename) mu = model.mcmc.trace("mu")[:] chi2 = (((mu - measurements.values) / uncertainties.values)**2).mean(0).mean() predictions_test, measurements_test, uncertainties_test = experiment_loader.load(ff, keys=ALA3.test_keys) predictions_test_statewise = np.array([predictions_test.values[ass_raw == i].mean(0) for i in np.arange(model.num_states)])
import pandas as pd import numpy as np from fitensemble import bayesian_weighting import experiment_loader import sys import ALA3 prior = "BW" ff = "amber99sbnmr-ildn" out_dir = ALA3.data_directory + "/BW_models/" predictions_framewise, measurements, uncertainties = experiment_loader.load(ff) phi, psi, ass_raw0, state_ind0 = experiment_loader.load_rama(ff, ALA3.stride) state_ind1 = state_ind0[0:3].copy() ass_raw = ass_raw0.copy() state_ind1[2][ass_raw == 3] = 2 ass_raw[ass_raw == 3] = 2 #state_ind = state_ind1[0:2] #state_ind[1][ass_raw == 2] = 1 #ass_raw[ass_raw == 2] = 1 num_states = ass_raw.max() + 1 prior_pops = np.ones(num_states) raw_pops = np.bincount(ass_raw).astype('float') raw_pops /= raw_pops.sum() predictions = pd.DataFrame(bayesian_weighting.framewise_to_statewise(predictions_framewise, ass_raw), columns=predictions_framewise.columns)
from fitensemble import lvbp import itertools import pandas as pd import numpy as np import matplotlib.pyplot as plt import ALA3 import experiment_loader rms_raw = np.zeros((len(ALA3.prior_list), len(ALA3.ff_list))) rms_lvbp = np.zeros((len(ALA3.prior_list), len(ALA3.ff_list))) for i, prior in enumerate(ALA3.prior_list): for j, ff in enumerate(ALA3.ff_list): regularization_strength = ALA3.regularization_strength_dict[prior][ff] data_directory = "/%s/%s/" % (ALA3.data_dir, ff) model_directory = "/%s/%s/models-%s/" % (ALA3.data_dir, ff, prior) p = np.loadtxt(model_directory + "reg-%d-frame-populations.dat" % regularization_strength) predictions, measurements, uncertainties = experiment_loader.load(data_directory, keys=ALA3.test_keys) lvbp_model = lvbp.LVBP.load(model_directory + "/reg-%d-BB0.h5" % regularization_strength) mu_lvbp = lvbp_model.trace_observable(predictions) data = pd.DataFrame(mu_lvbp, columns=predictions.columns) print(data) data.to_hdf(model_directory + "test_set_mcmc.h5", 'data', mode="w") print(prior, ff) rms_raw = pd.DataFrame(rms_raw, columns=ALA3.ff_list, index=ALA3.prior_list) rms_lvbp = pd.DataFrame(rms_lvbp, columns=ALA3.ff_list, index=ALA3.prior_list)