def run(ff, prior, regularization_strength):
    predictions, measurements, uncertainties = experiment_loader.load(ff, stride=ALA3.cross_val_stride)
    if prior == "maxent":
        model_factory = lambda predictions, measurements, uncertainties: belt.MaxEntBELT(predictions, measurements, uncertainties, regularization_strength)
    elif prior == "dirichlet":
        model_factory = lambda predictions, measurements, uncertainties: belt.DirichletBELT(predictions, measurements, uncertainties, regularization_strength)
    elif prior == "MVN":
        precision = np.cov(predictions.values.T)
        model_factory = lambda predictions, measurements, uncertainties: belt.MVNBELT(predictions, measurements, uncertainties, regularization_strength, precision=precision)

    bootstrap_index_list = np.array_split(np.arange(len(predictions)), ALA3.kfold)
    train_chi, test_chi = belt.cross_validated_mcmc(predictions.values, measurements.values, uncertainties.values, model_factory, bootstrap_index_list, ALA3.num_samples, thin=ALA3.thin)

    print regularization_strength, train_chi.mean(), test_chi.mean()
    F = open(ALA3.cross_val_filename, 'a')
    F.write("%s,%s,%f,%d,%d,%f,%f \n"% (ff, prior, regularization_strength, ALA3.cross_val_stride, ALA3.num_samples, train_chi.mean(), test_chi.mean()))
    F.close()
def run(ff, prior, regularization_strength, bootstrap_index_list):
    pymc_filename = ALA3.data_directory + "/models/model_%s_%s_reg-%.1f-BB%d.h5" % (ff, prior, regularization_strength, bayesian_bootstrap_run)
    populations_filename = ALA3.data_directory + "/frame_populations/pops_%s_%s_reg-%.1f-BB%d.dat" % (ff, prior, regularization_strength, bayesian_bootstrap_run)

    predictions, measurements, uncertainties = experiment_loader.load(ff)

    num_frames, num_measurements = predictions.shape
    bootstrap_index_list = np.array_split(np.arange(num_frames), ALA3.num_blocks)

    if bayesian_bootstrap_run == 0:
        prior_pops = None
    else:
        prior_pops = ensemble_fitter.sample_prior_pops(num_frames, bootstrap_index_list)

    if prior == "maxent":
        model = belt.MaxEntBELT(predictions.values, measurements.values, uncertainties.values, regularization_strength, prior_pops=prior_pops)
    elif prior == "dirichlet":
        model = belt.DirichletBELT(predictions.values, measurements.values, uncertainties.values, regularization_strength, prior_pops=prior_pops)
    elif prior == "MVN":
        model = belt.MVNBELT(predictions.values, measurements.values, uncertainties.values, regularization_strength, prior_pops=prior_pops)

    model.sample(ALA3.num_samples, thin=ALA3.thin, burn=ALA3.burn, filename=pymc_filename)
    p = model.accumulate_populations()
    np.savetxt(populations_filename, p)
matplotlib.rcParams.update({'font.size': 20})

alpha = 0.2
num_grid = 500
phi = np.linspace(-180,180,num_grid)
O = np.ones(num_grid)
colors = ["b","g","r","y"]

simulation_data = {}

ff = "amber99"  # Load FF data for comparison, not used in actual figure
phi1, psi1, ass_raw, state_ind = experiment_loader.load_rama(ff, 1)

J = scalar_couplings.J3_HN_HA(phi)

predictions, measurements, uncertainties = experiment_loader.load(ff, keys=[("JC", 2, "J3_HN_HA")])
yi = measurements.iloc[0]
oi = uncertainties.iloc[0]

factor = 1.0
regularization_strength = 0.2
model = belt.MaxEntBELT(predictions.values, measurements.values, factor * uncertainties.values, regularization_strength)
model.sample(5000)

#obs = belt.

ai = model.mcmc.trace("alpha")[:]
mu = model.trace_observable(predictions.values[:,0])

n = len(predictions)
prior_pops = np.ones(n) / float(n)
from fitensemble import lvbp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ALA3
import experiment_loader

ff = "amber99"
prior = "maxent"
regularization_strength = ALA3.regularization_strength_dict[prior][ff]
#regularization_strength = 5.0

data_directory = "/%s/%s/" % (ALA3.data_dir, ff)
model_directory = "/%s/%s/models-%s/" % (ALA3.data_dir, ff, prior)
#model_directory = "/%s/%s/models-all-expt-%s/" % (ALA3.data_dir, ff, prior)

predictions, measurements, uncertainties = experiment_loader.load(data_directory)
phi, psi, ass_raw, state_ind = experiment_loader.load_rama(data_directory, 1)
lvbp_model = lvbp.LVBP.load(model_directory + "/reg-%d-BB0.h5" % regularization_strength)

#p = np.loadtxt(model_directory + "reg-%d-frame-populations.dat" % regularization_strength)
a = lvbp_model.mcmc.trace("alpha")[:]
plot(a[:,0])
import pandas as pd
import numpy as np
import experiment_loader
import ALA3
from fitensemble import belt
import itertools

num_BB = 2
grid = itertools.product(ALA3.ff_list, ALA3.prior_list)

data = {}
for k, (ff, prior) in enumerate(grid):
    print(ff, prior)
    regularization_strength = ALA3.regularization_strength_dict[prior][ff]
    predictions, measurements, uncertainties = experiment_loader.load(ff, keys=None)
    z = (predictions.mean() - measurements) / uncertainties
    chi2_all = (z ** 2).mean()
    chi2_train = (z[ALA3.train_keys] ** 2).mean()
    chi2_test = (z[ALA3.test_keys] ** 2).mean()
    data[ff] = chi2_all, chi2_train, chi2_test
    mcmc_filename = "mcmc_traces/mu_%s_%s_reg-%.1f-BB%d.h5"
    mu_mcmc = pd.concat([pd.HDFStore(mcmc_filename % (ff, prior, regularization_strength, bayesian_bootstrap_run))["data"] for bayesian_bootstrap_run in range(num_BB)])
    z = (mu_mcmc - measurements) / uncertainties
    chi2_all = (z ** 2).mean().mean()
    chi2_train = (z[ALA3.train_keys] ** 2).mean().mean()
    chi2_test = (z[ALA3.test_keys] ** 2).mean().mean()
    data["%s_%s" % (ff, prior)] = chi2_all, chi2_train, chi2_test
    
columns = ["all", "train", "test"]
data = pd.DataFrame(data, index=columns).T
print("***********")
import experiment_loader
import sys
import ALA3

ff = "amber96"

effective_counts = 1000.0
num_bins = 0
num_states = num_bins ** 2
prior = "BW%d" % num_bins

directory = "%s/%s" % (ALA3.data_dir, ff)
out_dir = directory + "/models-%s/" % prior
pymc_filename = out_dir + "/model.h5"

predictions, measurements, uncertainties = experiment_loader.load(ff, stride=ALA3.stride)
phi, psi, ass_raw, state_ind = experiment_loader.load_rama(ff, ALA3.stride)
num_frames, num_measurements = predictions.shape

if num_bins != 0:
    assignments = schwalbe_couplings.assign_grid(phi, psi, num_bins)[2]
else:
    assignments = ass_raw

prior_state_pops = np.bincount(assignments).astype("float")
prior_state_pops /= prior_state_pops.sum()
prior_state_pops *= effective_counts

ALA3.bw_num_samples = 10000
model = bayesian_weighting.BayesianWeighting(
    predictions.values, measurements.values, uncertainties.values, assignments, prior_state_pops=prior_state_pops
import numpy as np
from fitensemble import lvbp
import experiment_loader
import sys
import ALA3

regularization_strength = 5.0
prior = "maxent"
ff = "amber99"
bayesian_bootstrap_run = 0
num_samples = 100000000
stride = 10
thin = 1000

directory = "%s/%s" % (ALA3.data_dir , ff)
out_dir = directory + "/models-all-expt-%s/" % prior
pymc_filename = out_dir + "/reg-%d-BB%d.h5" % (regularization_strength, bayesian_bootstrap_run)

predictions, measurements, uncertainties, phi, psi, ass_raw, state_ind = experiment_loader.load(directory, stride=stride, select_keys=ALA3.all_keys)
print(predictions)
num_frames, num_measurements = predictions.shape

model = lvbp.MaxEnt_LVBP(predictions.values, measurements.values, uncertainties.values, regularization_strength)
model.sample(num_samples, thin=thin, burn=ALA3.burn, filename=pymc_filename)

p = model.accumulate_populations()
np.savetxt(out_dir+"/reg-%d-frame-populations.dat" % (regularization_strength), p)
import pandas as pd
import numpy as np
from fitensemble import bayesian_weighting, belt
import experiment_loader
import ALA3

ff = "amber96"
regularization_strength = 10.0
stride = 40

thin = 500
steps = 1000000

predictions, measurements, uncertainties = experiment_loader.load(ff, stride=stride)

model = belt.MaxEntBELT(predictions.values, measurements.values, uncertainties.values, regularization_strength)
model.sample(steps, thin=thin, burn=ALA3.burn)

chi2 = []
prior = []
H_terms = []
for j, p in enumerate(model.iterate_populations()):
    mu = predictions.T.dot(p)
    chi2.append(0.5 * (((mu - measurements) / uncertainties) ** 2).sum())
    prior.append(regularization_strength * -1.0 * p.dot(np.log(p)))
    H = -np.diag(p[:-1] ** -1.) - p[-1] ** -1.
    H_terms.append(0.5 * np.linalg.slogdet(H)[1])

R = pd.DataFrame({"chi2":chi2, "prior":prior, "H":H_terms})
import numpy as np
from fitensemble import bayesian_weighting
import experiment_loader
import sys
import ALA3

prior = "BW"
ff = "amber96"
effective_counts = 1.

out_dir = ALA3.data_directory + "/BW_models/"
pymc_filename = out_dir + "/model_BW_%s.h5" % (ff)

predictions, measurements, uncertainties = experiment_loader.load(ff)
phi, psi, ass_raw, state_ind = experiment_loader.load_rama(ff, ALA3.stride)
num_frames, num_measurements = predictions.shape

num_states = ass_raw.max() + 1
prior_state_pops = np.bincount(ass_raw).astype('float')
prior_state_pops /= prior_state_pops.sum()
prior_state_pops *= effective_counts

prior_state_pops = np.ones(num_states)

model = bayesian_weighting.BayesianWeighting.load(pymc_filename)

mu = model.mcmc.trace("mu")[:]
chi2 = (((mu - measurements.values) / uncertainties.values)**2).mean(0).mean()

predictions_test, measurements_test, uncertainties_test = experiment_loader.load(ff, keys=ALA3.test_keys)
predictions_test_statewise = np.array([predictions_test.values[ass_raw == i].mean(0) for i in np.arange(model.num_states)])
import pandas as pd
import numpy as np
from fitensemble import bayesian_weighting
import experiment_loader
import sys
import ALA3

prior = "BW"
ff = "amber99sbnmr-ildn"

out_dir = ALA3.data_directory + "/BW_models/"

predictions_framewise, measurements, uncertainties = experiment_loader.load(ff)
phi, psi, ass_raw0, state_ind0 = experiment_loader.load_rama(ff, ALA3.stride)

state_ind1 = state_ind0[0:3].copy()
ass_raw = ass_raw0.copy()
state_ind1[2][ass_raw == 3] = 2
ass_raw[ass_raw == 3] = 2

#state_ind = state_ind1[0:2]
#state_ind[1][ass_raw == 2] = 1
#ass_raw[ass_raw == 2] = 1

num_states = ass_raw.max() + 1

prior_pops = np.ones(num_states)
raw_pops = np.bincount(ass_raw).astype('float')
raw_pops /= raw_pops.sum()

predictions = pd.DataFrame(bayesian_weighting.framewise_to_statewise(predictions_framewise, ass_raw), columns=predictions_framewise.columns)
from fitensemble import lvbp
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ALA3
import experiment_loader

rms_raw = np.zeros((len(ALA3.prior_list), len(ALA3.ff_list)))
rms_lvbp = np.zeros((len(ALA3.prior_list), len(ALA3.ff_list)))
for i, prior in enumerate(ALA3.prior_list):
    for j, ff in enumerate(ALA3.ff_list):
        regularization_strength = ALA3.regularization_strength_dict[prior][ff]
        data_directory = "/%s/%s/" % (ALA3.data_dir, ff)
        model_directory = "/%s/%s/models-%s/" % (ALA3.data_dir, ff, prior)
        p = np.loadtxt(model_directory + "reg-%d-frame-populations.dat" % regularization_strength)
        predictions, measurements, uncertainties = experiment_loader.load(data_directory, keys=ALA3.test_keys)
        lvbp_model = lvbp.LVBP.load(model_directory + "/reg-%d-BB0.h5" % regularization_strength)
        mu_lvbp = lvbp_model.trace_observable(predictions)
        data = pd.DataFrame(mu_lvbp, columns=predictions.columns)
        print(data)
        data.to_hdf(model_directory + "test_set_mcmc.h5", 'data', mode="w")
        print(prior, ff)

rms_raw = pd.DataFrame(rms_raw, columns=ALA3.ff_list, index=ALA3.prior_list)
rms_lvbp = pd.DataFrame(rms_lvbp, columns=ALA3.ff_list, index=ALA3.prior_list)