def test_delete():
    tica_mdl = load(os.path.join(base_dir,"dihedral_mdl/tica_mdl.pkl"))
    tica_data = load(os.path.join(base_dir,"dihedral_mdl/tica_features.pkl"))
    df = pd.read_pickle(os.path.join(base_dir,"./dihedral_mdl/feature_descriptor.pkl"))
    with enter_temp_directory():
        cur_dir = os.path.abspath(os.path.curdir)
        TicaMetadSim(base_dir=cur_dir, tica_data=tica_data,tica_mdl=tica_mdl,
                     data_frame=df, grid=False, interval=False,render_scripts=True,
		    delete_existing=True)


        f = open("tic_0/rand.txt",'w')
        f.writelines("t")
        f.close()

        TicaMetadSim(base_dir=cur_dir, tica_mdl=tica_mdl,tica_data=tica_data,
                     data_frame=df, grid=False, interval=False,
                     delete_existing=False)

        assert os.path.isfile("tic_0/rand.txt")


        TicaMetadSim(base_dir=cur_dir, tica_mdl=tica_mdl,tica_data=tica_data,
                     data_frame=df, grid=False, interval=False,
                     delete_existing=True)

        assert not os.path.isfile("tic_0/rand.txt")
Exemple #2
0
def _test_plumed_run():
    tica_mdl = load(os.path.join(base_dir, "dihedral_mdl/tica_mdl.pkl"))
    df = pd.read_pickle(
        os.path.join(base_dir, "./dihedral_mdl/feature_descriptor.pkl"))
    starting_coordinates_folder = os.path.join(base_dir,
                                               "starting_coordinates")
    with enter_temp_directory():
        cur_dir = os.path.abspath(os.path.curdir)
        TicaMetadSim(base_dir=cur_dir,
                     starting_coordinates_folder=starting_coordinates_folder,
                     tica_mdl=tica_mdl,
                     data_frame=df,
                     grid=False,
                     interval=False,
                     wall=False,
                     platform='CPU',
                     n_iterations=1,
                     swap_rate=5,
                     sim_save_rate=10,
                     pace=1,
                     stride=1)
        meta_sim = load("./metad_sim.pkl")
        run_meta_sim("./metad_sim.pkl")
        for i in range(1):
            for j in [meta_sim.bias_file, meta_sim.hills_file,\
                      "speed_report.txt","trajectory.dcd","plumed_script.dat",\
                      "checkpt.chk"]:
                print(i, j)
                assert os.path.isfile(os.path.join(cur_dir, "tic_%d" % i, j))
        assert not os.path.isfile("swap_log.txt")
Exemple #3
0
def get_plumed_dict(metad_sim):
    if  type(metad_sim)==str:
        metad_sim = load(metad_sim)
    if not hasattr(metad_sim,"nrm"):
        metad_sim.nrm = None
    if not hasattr(metad_sim,"walker_id"):
        metad_sim.walker_id = None
        metad_sim.walker_n = None
    if not hasattr(metad_sim, "multiple_tics"):
        metad_sim.multiple_tics = None
    if not hasattr(metad_sim, "vde_mdl"):
        metad_sim.vde_mdl = None
    if type(metad_sim.tica_mdl)==str:
            tica_mdl = load(metad_sim.tica_mdl)
    return render_tica_plumed_file(tica_mdl=tica_mdl,
                                   df = metad_sim.data_frame,
                                   n_tics=metad_sim.n_tics,
                                   grid=metad_sim.grid,
                                   interval=metad_sim.interval,
                                    wall_list=metad_sim.wall_list,
                                   grid_list=metad_sim.grid_list,
                                   interval_list=metad_sim.interval_list,
                                    pace=metad_sim.pace,
                                   height=metad_sim.height, biasfactor=metad_sim.biasfactor,
                                    temp=metad_sim.temp, sigma=metad_sim.sigma,
                                   stride=metad_sim.stride, hills_file=metad_sim.hills_file,
                                   bias_file=metad_sim.bias_file, label=metad_sim.label,
                                   nrm = metad_sim.nrm, walker_id = metad_sim.walker_id,
                                   walker_n=metad_sim.walker_n,
                                   multiple_tics=metad_sim.multiple_tics,
                                   vde_mdl = metad_sim.vde_mdl)
def process_all_replicas(file_loc, redo=True, stride=1):
    sim_mdl = load(file_loc)
    os.chdir(sim_mdl.base_dir)
    top_loc = glob.glob(
        os.path.join(sim_mdl.starting_coordinates_folder, "0.pdb"))[0]
    for i in range(sim_mdl.n_tics):
        if redo:
            concatenate_folder("tic_%d" % i, top_loc, stride)

    sim_mdl.pace = 1000000000
    sim_mdl.height = 0
    sim_mdl.stride = 1
    sim_mdl.bias_file = "{{fname}}"
    plumed_scripts_dict = get_plumed_dict(sim_mdl)
    full_dict = {}
    for r1 in range(sim_mdl.n_tics):
        for r2 in range(sim_mdl.n_tics):
            full_dict["%d_%d" % (r1, r2)] = Template(
                plumed_scripts_dict[r1]).render(fname="r%d_t%d.bias" %
                                                (r1, r2))

    p = Pool(sim_mdl.n_tics)
    jobs = [(r1, r2, full_dict["%d_%d" % (r1, r2)])
            for r1 in range(sim_mdl.n_tics) for r2 in range(sim_mdl.n_tics)]
    p.map(process_folder, jobs)
    p.close()
    return
Exemple #5
0
def test_load_legacy():
    # Used to save joblib files
    data = dict(name="Fancy_name", arr=np.random.rand(10, 5))
    with tempdir():
        jl_dump(data, 'filename', compress=1)
        data2 = load('filename')
    eq(data, data2)
Exemple #6
0
def test_load_legacy():
    # Used to save joblib files
    data = dict(name="Fancy_name", arr=np.random.rand(10, 5))
    with tempdir():
        jl_dump(data, 'filename', compress=1)
        data2 = load('filename')
    eq(data, data2)
def test_setup():
    tica_mdl = load(os.path.join(base_dir,"dihedral_mdl/tica_mdl.pkl"))
    tica_data = load(os.path.join(base_dir,"dihedral_mdl/tica_features.pkl"))
    df = pd.read_pickle(os.path.join(base_dir,"./dihedral_mdl/feature_descriptor.pkl"))
    with enter_temp_directory():
        cur_dir = os.path.abspath(os.path.curdir)
        TicaMetadSim(base_dir=cur_dir,tica_data=tica_data, tica_mdl=tica_mdl,
                     data_frame=df, grid=False, interval=False,wall=False,
                     render_scripts=True,
                    delete_existing=True)

        metad_sim = load("./metad_sim.pkl")

        assert eq(tica_mdl.components_, metad_sim.tica_mdl.components_)
        for i in range(metad_sim.n_tics):
            assert os.path.isdir("tic_%d"%i)
            assert os.path.isfile(("tic_%d/plumed.dat"%i))
        assert os.path.isfile("sub.sh")
def test_transform_command_1():
    with tempdir():
        shell("msmb KCenters -i {data_home}/alanine_dipeptide/*.dcd "
              "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb "
              "--metric rmsd".format(data_home=get_data_home()))
        shell("msmb TransformDataset -i {data_home}/alanine_dipeptide/*.dcd "
              "-m model.pkl -t transformed.h5 --top "
              "{data_home}/alanine_dipeptide/ala2.pdb".format(data_home=get_data_home()))

        eq(dataset('transformed.h5')[0], load('model.pkl').labels_[0])
    def __init__(self, file_loc="metad_sim.pkl"):
        from tica_metadynamics.load_sim import create_simulation
        self.file_loc = file_loc
        self.metad_sim = load(self.file_loc)
        self.beta = 1/(boltzmann_constant * self.metad_sim.temp)

        #get
        self.rank = rank
        self.size = size
        self.host_name = socket.gethostname()
        self.gpu_index = get_gpu_index()

        #setup MSM swap stuff
        if self.metad_sim.msm_swap_folder is not None:
            self.setup_msm_swap()

        print("Hello from rank %d running tic %d on "
          "host %s with gpu %d"%(self.rank, self.rank,
                                 self.host_name, self.gpu_index))
        # if multi walkers
        if hasattr(self.metad_sim,"n_walkers") and self.metad_sim.n_walkers > 1:
            cbd = self.metad_sim.base_dir
            walker_index = int(os.path.split(cbd)[1].strip("walker_"))
            print("I am walker %d running tic%d"%(walker_index,self.rank))
            self.metad_sim.walker_index = walker_index

        if self.metad_sim.plumed_script is not None:
            #self.plumed_force_dict = self.metad_sim.plumed_dict
            self.plumed_force_dict = {0: self.metad_sim.plumed_script}
        else:
            self.plumed_force_dict = get_plumed_dict(self.metad_sim)

        # last replica is the neutral replica
        if self.metad_sim.neutral_replica and self.rank==self.size-1:
            from tica_metadynamics.load_sim import create_neutral_simulation
            self.sim_obj = create_neutral_simulation(self.metad_sim.base_dir,
                                                     self.metad_sim.starting_coordinates_folder,
                                                     self.gpu_index,
                                                     self.metad_sim.sim_save_rate,
                                                     self.metad_sim.platform)
        else:
            self.sim_obj, self.force_group = create_simulation(self.metad_sim.base_dir,
                                                           self.metad_sim.starting_coordinates_folder,
                                                           self.gpu_index,
                                                           self.rank,
                                                           self.plumed_force_dict[self.rank],
                                                           self.metad_sim.sim_save_rate,
                                                           self.metad_sim.platform)
        if self.rank ==0 and self.size > 1:
            self.log_file = open("../swap_log.txt","a")
            header = ["Iteration","S_i","S_j","Eii","Ejj","Eij","Eji",
                      "DeltaE","Temp","Beta","Probability","Accepted"]
            self.log_file.writelines("#{}\t{}\t{}\t{}\t{}\t{}"
                                "\t{}\t{}\t{}\t{}\t{}\t{}\n".format(*header))
def test_dump():
    # gh-713
    sequence = [0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 2]
    model = ContinuousTimeMSM(verbose=False)
    model.fit([sequence])

    d = tempfile.mkdtemp()
    try:
        utils.dump(model, '{}/cmodel'.format(d))
        m2 = utils.load('{}/cmodel'.format(d))
        np.testing.assert_array_almost_equal(model.transmat_, m2.transmat_)
    finally:
        shutil.rmtree(d)
Exemple #11
0
def test_dump():
    # gh-713
    sequence = [0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 2]
    model = PESContinuousTimeMSM(verbose=False)
    model.fit([sequence])

    d = tempfile.mkdtemp()
    try:
        utils.dump(model, '{}/cmodel'.format(d))
        m2 = utils.load('{}/cmodel'.format(d))
        np.testing.assert_array_almost_equal(model.transmat_, m2.transmat_)
    finally:
        shutil.rmtree(d)
Exemple #12
0
    def _component_type(self, spec):
        if spec is None:
            return None
        spec_split = spec.split(':')
        if len(spec_split) > 1:
            fn = ':'.join(spec_split[:-1])
            comp_i = int(spec_split[-1])
        else:
            fn = spec_split[0]
            comp_i = 0

        obj = utils.load(fn)
        component = obj.components_[comp_i]
        component = component ** 2
        component = component / np.max(component)
        return component
Exemple #13
0
def test_transform_command_1():
    with tempdir():
        shell("msmb KCenters -i {data_home}/alanine_dipeptide/*.dcd "
              "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb "
              "--metric rmsd".format(data_home=get_data_home()))
        shell("msmb TransformDataset -i {data_home}/alanine_dipeptide/*.dcd "
              "-m model.pkl -t transformed.h5 --top "
              "{data_home}/alanine_dipeptide/ala2.pdb"
              .format(data_home=get_data_home()))

        eq(dataset('transformed.h5')[0], load('model.pkl').labels_[0])

    with tempdir():
        shell("msmb KCenters -i {data_home}/alanine_dipeptide/trajectory-0.dcd "
              "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb "
              "--metric rmsd".format(data_home=get_data_home()))
def pull_features(yaml_file, prt, skip=1, feature_indices=None):
    """
    Simple utility to pull certain features from the feature_folder object
    :param prt: Protein model to use
    :param skip: skip for each file(defaults to 1)
    :param feature_indices: which indices to pull
    :return: dictionary keyed on file name with feature values as arrays
    """
    yaml_file = load_yaml_file(yaml_file)
    all_f ={}
    with enter_protein_data_dir(yaml_file, prt.name):
        feature_file_list = glob.glob("./%s/*.jl"%yaml_file["feature_dir"])
        for i in feature_file_list:
            all_f[os.path.basename(i)]=load(i)[:, feature_indices]

    return all_f
Exemple #15
0
def validate_plumed_script(sim_obj_loc="metad_sim.pkl",
                           featurizer=None,
                           traj=None):
    sim_obj = load(sim_obj_loc)
    if featurizer is None and not hasattr(sim_obj, featurizer):
        raise ValueError("Featuizer cant be none if sim_obj doesnt "
                         "have featurizer object")
    if traj is None:
        warnings.warn("No test trj found")
    # with enter_temp_directory():
    #     for i in range()
    f = open("./plumed.dat", 'w')
    f.writelines(globals()["plumed_%d" % tic_index].format(bias="r%dt%d.bias" %
                                                           (tic_index, i)))
    f.close()

    cmd = [
        "plumed", "--no-mpi", "driver", "--mf_xtc",
        "../tic_%s/tic_%s.xtc" % (i, i)
    ]
    ret_code = call(cmd)
    return
Exemple #16
0
import os
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition

sysname = os.path.split(os.getcwd())[-1]
dt = 0.25
tica_lagtime = 400
#regularization_string = ""
regularization_string = "_012"

dih = dataset.NumpyDirDataset("./dihedrals/")
X = dataset.dataset("./tica/tica%d%s.h5" %
                    (tica_lagtime, regularization_string))

tica_model = utils.load("./tica/tica%d%s.pkl" %
                        (tica_lagtime, regularization_string))

Xf = np.concatenate(X)

hexbin(Xf[:, 0], Xf[:, 1], bins='log')

tica_model.timescales_

title("tICA: lagtime %d (%.3f)" % (tica_lagtime, dt * tica_lagtime))
xlabel("Slowest tIC")
ylabel("Second Slowest tIC")
savefig("./%s_tica_lag%d%s.png" %
        (sysname, tica_lagtime, regularization_string),
        bbox_inches="tight")
#Execute like python importing_plumed.py >tic{TICnumber}.dat
from tica_metadynamics.plumed_writer import render_tica_plumed_file
from msmbuilder.featurizer import DihedralFeaturizer
import pandas as pd
import mdtraj as md
from msmbuilder.utils import load
#python imports
import os, glob
import numpy as np
import pickle

tica_model = load("./tica_mdl_flapchi1angle.pkl")

#a = np.arange(1027,1357)
a = np.arange(1118, 1276)
top = md.load("prot.pdb", atom_indices=a)

# swap this for whatever you have. The code for now supports contacts, dihedral, and angles.
feat = DihedralFeaturizer(types=['chi1', 'chi2'])

# this basically maps every feature to atom indices.
df1 = pd.DataFrame(feat.describe_features(top))
#print(df1)

output = render_tica_plumed_file(tica_mdl=tica_model,
                                 df=df1,
                                 n_tics=5,
                                 multiple_tics=None,
                                 vde_mdl=None)
#Printing TIC1
#print(output[0])
import matplotlib
matplotlib.use('Agg')

from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition
from sklearn.pipeline import make_pipeline
import numpy as np
import matplotlib.pyplot as plt
import mdtraj as md

tica_lagtime = 1600

dih = dataset.NumpyDirDataset("./dihedrals/")
X = dataset.dataset("./tica%d.h5" % tica_lagtime)
Xf = np.concatenate(X)

tica_model = utils.load("./tica%d.pkl" % tica_lagtime)

#Load trajectory with ensembler models
t_models = md.load("../ensembler-models/traj-refine_implicit_md.xtc", top = "../ensembler-models/topol-renumbered-implicit.pdb")

#Now make dihedrals of this.
dihedrals_models = featurizer.DihedralFeaturizer(types=["phi", "psi", "chi1", "chi2"]).transform([t_models])
x_models = tica_model.transform(dihedrals_models)

#Now plot on the slow MSM features found before.
plt.plot(x_models[0][:, 0], x_models[0][:, 1], 'o', markersize=5, label="ensembler models", color='white')
plt.title("Dihedral tICA Analysis - Abl")
plt.xlabel("Slowest Coordinate")
plt.ylabel("Second Slowest Coordinate")
plt.legend()
Exemple #19
0
import scipy.spatial
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition
from sklearn.pipeline import make_pipeline

dih = dataset.NumpyDirDataset("./dihedrals/")
X = dataset.dataset("./tica.h5")

tica_model = utils.load("./tica.pkl")
dih_model = utils.load("./dihedrals/model.pkl")

clusterer = utils.load("./cluster.pkl")
microstate_model = utils.load("./msm.pkl")

labels = microstate_model.transform(clusterer.labels_)

n_macrostates = 5

pcca = lumping.PCCAPlus.from_msm(microstate_model, n_macrostates=n_macrostates)
macrostate_model = msm.MarkovStateModel()
macrostate_model.fit(pcca.transform(labels))

pipeline = make_pipeline(clusterer, microstate_model, pcca, macrostate_model)
s = pipeline.transform(X)
sf = np.concatenate(s)
Xf = np.concatenate(X)

for i in range(n_macrostates):
    figure()
    f = hexbin(Xf[:, 0], Xf[:, 1], bins='log')
    hull = scipy.spatial.ConvexHull(Xf[sf == i, 0:2])
    scipy.spatial.convex_hull_plot_2d(hull, ax=f.axes)
Exemple #20
0
def test_dump_load():
    data = dict(name="Fancy_name", arr=np.random.rand(10, 5))
    with tempdir():
        dump(data, 'filename')
        data2 = load('filename')
    eq(data, data2)
import mdtraj as md
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition
from sklearn.pipeline import make_pipeline

trj0 = md.load("traj-refine_implicit_md.xtc", top="topol-renumbered-implicit.pdb")
trj0 = trj0[0:50]

X = dataset.dataset("./tica.h5")
Xf = np.concatenate(X)

dih_model = utils.load("./dihedrals/model.pkl")
tica_model = utils.load("./tica.pkl")

pipeline = make_pipeline(dih_model, tica_model)
x0 = pipeline.transform([trj0])[0]

hexbin(Xf[:, 0], Xf[:, 1], bins='log')
plot(x0[:, 0], x0[:, 1], 'kx')
map(lambda k: annotate(k, xy=x0[k, 0:2], fontsize=14), arange(len(x0)))
    cfg = ConfigParser(interpolation=None)
    cfg.read(os.environ.get("CONFIG"))

    # Load optional env variables files
    if os.environ.get("GEN"):
        prevgen = int(os.environ.get("GEN"))
    else:
        prevgen = cfg.getint("production", "generation") - 1

    if os.environ.get("STRIDE"):
        stride = int(os.environ.get("STRIDE"))
    else:
        stride = 1

    if os.environ.get("MSM"):
        mmsm = load(os.environ.get("MSM"))
    else:
        mmsm = load(
            os.path.join(cfg["system"]["rootdir"], "production", str(prevgen),
                         "mmsm_G%d.pkl" % prevgen))
    if os.environ.get("CLUST"):
        clust = load(os.environ.get("CLUST"))
    else:
        clust = load(
            os.path.join(cfg["system"]["rootdir"], "production", str(prevgen),
                         "testing.mcluster.pkl"))
    clust = [c[::stride] for c in clust]

    # Make directory structure
    if os.environ.get("CDIR"):
        outdir = os.environ.get("CDIR")
Exemple #23
0
import pandas as pd
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition
from sklearn.pipeline import make_pipeline
import mdtraj as md

tica_lagtime = 1600

trajectories = dataset.MDTrajDataset("./trajectories/*.h5")
t0 = trajectories[0][0]

dih = dataset.NumpyDirDataset("./dihedrals/")
X = dataset.dataset("./tica/tica%d.h5" % tica_lagtime)
Xf = np.concatenate(X)

tica_model = utils.load("./tica/tica%d.pkl" % tica_lagtime)
dih_model = utils.load("./dihedrals/model.pkl")


d = dih_model.describe_features(t0)
d = pd.DataFrame(d)

d.ix[argsort(tica_model.eigenvectors_[:, 0])[0:5]]
d.ix[argsort(tica_model.eigenvectors_[:, 0])[-5:]]
    def __init__(self,
                 base_dir="./",
                 starting_coordinates_folder="./starting_coordinates",
                 n_tics=1,
                 tica_mdl="tica_mdl.pkl",
                 tica_data="tica_data.pkl",
                 data_frame="feature_descriptor.pkl",
                 featurizer=None,
                 kmeans_mdl=None,
                 nrm=None,
                 wt_msm_mdl=None,
                 grid=False,
                 interval=False,
                 wall=False,
                 pace=2500,
                 stride=2500,
                 temp=300,
                 biasfactor=10,
                 height=1.0,
                 sigma=0.2,
                 delete_existing=False,
                 hills_file="HILLS",
                 bias_file="BIAS",
                 label="metad",
                 sim_save_rate=50000,
                 swap_rate=25000,
                 n_iterations=1000,
                 platform='CUDA',
                 grid_mlpt_factor=.3,
                 render_scripts=False,
                 msm_swap_folder=None,
                 msm_swap_scheme='random',
                 n_walkers=1,
                 neutral_replica=False,
                 multiple_tics=False,
                 plumed_script=None,
                 vde_mdl=None):
        self.base_dir = os.path.abspath(base_dir)
        self.starting_coordinates_folder = starting_coordinates_folder
        self.n_tics = n_tics

        if type(featurizer) == str:
            self.featurizer = load(featurizer)
        else:
            self.featurizer = featurizer

        self.tica_mdl = tica_mdl

        if type(tica_data) == str:
            self.tica_data = load(tica_data)
        else:
            self.tica_data = tica_data

        if type(data_frame) == str:
            self.data_frame = load(data_frame)
        else:
            self.data_frame = data_frame

        if type(kmeans_mdl) == str:
            self.kmeans_mdl = load(kmeans_mdl)
        else:
            self.kmeans_mdl = kmeans_mdl

        if type(nrm) == str:
            self.nrm = load(nrm)
        else:
            self.nrm = nrm

        if type(wt_msm_mdl) == str:
            self.wt_msm_mdl = load(wt_msm_mdl)
        else:
            self.wt_msm_mdl = wt_msm_mdl
# load plumed file

        if type(plumed_script) == str:
            self.plumed_script = open(plumed_script, 'r').read()


#TODO: write multiple scripts into dictionary separately?
#            self.n_tics = len(list(self.plumed_dict.keys()))
        else:
            self.plumed_script = None

        self.grid = grid
        self.grid_mlpt_factor = grid_mlpt_factor
        self.interval = interval
        self.wall = wall
        self.delete_existing = delete_existing
        self.n_iterations = n_iterations
        self.platform = platform
        self.grid_list = self.interval_list = self.wall_list = None
        self.render_scripts = render_scripts
        self.walker_n = n_walkers
        self.multiple_tics = multiple_tics
        self.vde_mdl = vde_mdl

        if self.grid:
            if len(self.grid) < 2:
                raise ValueError("grid must length at least 2 (like [0, 100]")
            if len(self.grid) == 2 and type(self.grid[0]) in [float, int]:
                # assume user meant us to specify
                self.grid_list = get_interval(self.tica_data, self.grid[0],
                                              self.grid[1])
                print(self.grid_list)
                #add extra mulplicative factor because these these tend to fail
                self.grid_list = [(k[0]-self.grid_mlpt_factor*abs(k[0]),\
                                   k[1]+self.grid_mlpt_factor*abs(k[1])) for k in self.grid_list]
                print(self.grid_list)
            else:
                self.grid_list = self.grid

        if self.interval:
            if len(self.interval) < 2:
                raise ValueError("interval must length 2(like [0, 100] for "
                                 "calculating percentiles")
            if len(self.interval) == 2 and type(
                    self.interval[0]) in [float, int]:
                self.interval_list = get_interval(self.tica_data,
                                                  self.interval[0],
                                                  self.interval[1])
            else:
                self.interval_list = self.interval

        if self.wall:
            if len(self.wall) < 2:
                raise ValueError("interval must length 2(like [0, 100] for "
                                 "calculating percentiles")
            if len(self.wall) == 2 and type(self.wall[0]) in [float, int]:
                self.wall_list = get_interval(self.tica_data, self.wall[0],
                                              self.wall[1])
            else:
                self.wall_list = self.wall

        self.pace = pace
        self.stride = stride
        self.temp = temp
        self.biasfactor = biasfactor
        self.height = height
        self.sigma = sigma
        self.hills_file = hills_file
        self.bias_file = bias_file
        self.label = label
        self.sim_save_rate = sim_save_rate
        self.swap_rate = swap_rate
        self.plumed_scripts_dict = None
        self.msm_swap_folder = msm_swap_folder
        self.msm_swap_scheme = msm_swap_scheme
        self.neutral_replica = neutral_replica
        self.tica_data = None

        if self.walker_n > 1:
            print("Multiple walkers found. Modifying current model")
            os.chdir(self.base_dir)
            # base_dir, has n_walker folders called walker_0 ... walkers
            c_base_dir = self.base_dir
            self._setup_walkers_folder()
            for w in range(self.walker_n):
                os.chdir(c_base_dir)
                self.base_dir = os.path.join(c_base_dir, "walker_%d" % w)
                self.walker_id = w
                os.chdir(base_dir)
                self._setup()
                self._write_scripts_and_dump()
                # make

        else:
            self._setup()
            self._write_scripts_and_dump()
Exemple #25
0
import scipy.spatial
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition
from sklearn.pipeline import make_pipeline

dih = dataset.NumpyDirDataset("./dihedrals/")
X = dataset.dataset("./tica.h5")

tica_model = utils.load("./tica.pkl")
dih_model = utils.load("./dihedrals/model.pkl")

clusterer = utils.load("./cluster.pkl")
microstate_model = utils.load("./msm.pkl")

labels = microstate_model.transform(clusterer.labels_)

n_macrostates = 5

pcca = lumping.PCCAPlus.from_msm(microstate_model, n_macrostates=n_macrostates)
macrostate_model = msm.MarkovStateModel()
macrostate_model.fit(pcca.transform(labels))


pipeline = make_pipeline(clusterer, microstate_model, pcca, macrostate_model)
s  = pipeline.transform(X)
sf = np.concatenate(s)
Xf = np.concatenate(X)


for i in range(n_macrostates):
    figure()
    f = hexbin(Xf[:, 0], Xf[:, 1], bins='log')
Exemple #26
0
import os
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition

sysname = os.path.split(os.getcwd())[-1]
dt = 0.25
tica_lagtime = 400
#regularization_string = ""
regularization_string = "_012"

dih = dataset.NumpyDirDataset("./dihedrals/")
X = dataset.dataset("./tica/tica%d%s.h5" % (tica_lagtime, regularization_string))

tica_model = utils.load("./tica/tica%d%s.pkl" % (tica_lagtime, regularization_string))

Xf = np.concatenate(X)

hexbin(Xf[:, 0], Xf[:, 1], bins='log')

tica_model.timescales_

title("tICA: lagtime %d (%.3f)" % (tica_lagtime, dt * tica_lagtime))
xlabel("Slowest tIC")
ylabel("Second Slowest tIC")
savefig("./%s_tica_lag%d%s.png" % (sysname, tica_lagtime, regularization_string), bbox_inches="tight")
Exemple #27
0
from msmbuilder.utils import load, dump
from msmbuilder.featurizer import DihedralFeaturizer
import os, glob
from msmbuilder.decomposition import tICA
import mdtraj as md
import pandas as pd
from msmbuilder.msm import MarkovStateModel
from msmbuilder.cluster import KMeans

trj_list = load("traj_list.pkl")
print("Found %d trajs" % len(trj_list))

f = DihedralFeaturizer(sincos=False)
dump(f, "raw_featurizer.pkl")

feat = f.transform(trj_list)

dump(feat, "raw_features.pkl")

f = DihedralFeaturizer()
dump(f, "featurizer.pkl")
df1 = pd.DataFrame(f.describe_features(trj_list[0]))
dump(df1, "feature_descriptor.pkl")
feat = f.transform(trj_list)

dump(feat, "features.pkl")

t = tICA(lag_time=100, n_components=1, kinetic_mapping=False)

tica_feat = t.fit_transform(feat)
Exemple #28
0
import numpy as np
from msmbuilder.utils import load
import os

plot_feat = load("./raw_features.pkl")
train_feat = load("./features.pkl")

df = load("./feature_descriptor.pkl")

from sklearn.linear_model import PassiveAggressiveClassifier

#Perception based model generation

X=np.vstack(plot_feat)
train_X=np.vstack(train_feat)

train_Y=np.concatenate([np.zeros(len(plot_feat[0])),
            np.ones(len(plot_feat[0]))])
if not os.path.isfile("./pasag_model_bpti.pkl"):
    train =True 
else:
    clf = load("./pasag_model_bpti.pkl")
    train =False
if train:
    clf = PassiveAggressiveClassifier(max_iter=1000)
    clf.fit(train_X, train_Y)

#Dumping the Model

if train:
    from msmbuilder.utils import dump
Exemple #29
0
from msmbuilder.cluster import KMeans

flist = glob.glob("./run*.xtc")

top = md.load("../top.pdb")
trj_list = [md.load(i, top=top) for i in flist]
print("Found %d trajs" % len(trj_list))

f = DihedralFeaturizer(sincos=False)
dump(f, "raw_featurizer.pkl")

feat = f.transform(trj_list)

dump(feat, "raw_features.pkl")

f = load("../featurizer.pkl")
df1 = pd.DataFrame(f.describe_features(trj_list[0]))
dump(df1, "feature_descriptor.pkl")
feat = f.transform(trj_list)

dump(feat, "features.pkl")

t = load("../tica_mdl.pkl")
t.commute_mapping = False
tica_feat = t.transform(feat)
dump(t, "tica_mdl.pkl")
dump(tica_feat, "tica_features.pkl")

kmeans_mdl = load("../kmeans_mdl.pkl")
ass = kmeans_mdl.predict(tica_feat)
Exemple #30
0
def run_meta_sim(file_loc="metad_sim.pkl"):
    from tica_metadynamics.load_sim import create_simulation

    metad_sim = load(file_loc)
    if metad_sim.msm_swap_folder is not None:
        print("Found MSM state folder. Will swap all replicas with the MSM "
              "occasionally",flush=True)
    #beta is 1/kt
    beta = 1/(boltzmann_constant * metad_sim.temp)

    #get
    my_host_name = socket.gethostname()
    my_gpu_index = get_gpu_index()
    print("Hello from rank %d running tic %d on "
          "host %s with gpu %d"%(rank, rank, my_host_name, my_gpu_index))

    plumed_force_dict = get_plumed_dict(metad_sim)
    sim_obj, force_group = create_simulation(metad_sim.base_dir, metad_sim.starting_coordinates_folder,
                                my_gpu_index, rank, plumed_force_dict[rank],
                                metad_sim.sim_save_rate, metad_sim.platform)
    if rank ==0 and size>1:
        log_file = open("../swap_log.txt","a")
        header = ["Iteration","S_i","S_j","Eii","Ejj","Eij","Eji",
                  "DeltaE","Temp","Beta","Probability","Accepted"]
        log_file.writelines("#{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(*header))

    for step in range(metad_sim.n_iterations):
        #2fs *3000 = 6ps
        sim_obj.step(metad_sim.swap_rate)

        if metad_sim.msm_swap_folder is not None and np.random.random() < 0.5:
            sim_obj = swap_with_msm_state(sim_obj, metad_sim.msm_swap_folder, force_group,beta)
        #get old energy for just the plumed force
        old_energy = sim_obj.context.getState(getEnergy=True,groups={force_group}).\
            getPotentialEnergy().value_in_unit(kilojoule_per_mole)
        #write the chckpt
        with open("checkpt.chk",'wb') as f:
            f.write(sim_obj.context.createCheckpoint())
        old_state = os.path.abspath("checkpt.chk")
        #send state and energy
        data = comm.gather((old_state,old_energy), root=0)
        if size >1:
            if rank==0:
                #rnd pick 2 states
                i,j =  np.random.choice(np.arange(size), 2, replace=False)
                s_i_i,e_i_i = data[i]
                s_j_j,e_j_j = data[j]
                #swap out states
                data[j], data[i] = data[i],data[j]
            else:
                data = None

            #get possible new state
            new_state = None
            new_state,energy = comm.scatter(data,root=0)
            #set state
            with open(new_state, 'rb') as f:
                sim_obj.context.loadCheckpoint(f.read())

            # return new state and new energies
            new_energy = sim_obj.context.getState(getEnergy=True,groups={force_group}).\
                getPotentialEnergy().value_in_unit(kilojoule_per_mole)
            data = comm.gather((new_state,new_energy), root=0)

            if rank==0:
                s_i_j, e_i_j = data[i]
                s_j_i, e_j_i = data[j]
                delta_e = e_i_i+e_j_j - e_i_j - e_j_i
                probability = np.min((1,np.exp(beta*delta_e)))
                print(e_i_i,e_j_j,e_i_j,e_j_i,probability)
                if np.random.random() < probability :
                    accepted= 1
                    print("Swapping out %d with %d"%(i,j),flush=True)
                else:
                    accepted= 0
                    print("Failed Swap of %d with %d"%(i,j),flush=True)
                    #go back to original state list
                    data[i], data[j] = data[j] , data[i]
                header = [step, i, j, e_i_i,e_j_j,e_i_j,e_j_i,delta_e,metad_sim.temp,beta,probability,accepted]
                log_file.writelines("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(*header))
                log_file.flush()
            else:
                data = None

            #get final state for iteration
            new_state,energy = comm.scatter(data,root=0)
            #print(rank,new_state)
            with open(new_state, 'rb') as f:
                sim_obj.context.loadCheckpoint(f.read())
            #barrier here to prevent
            comm.barrier()

    if rank==0 and size >1:
        log_file.close()
    return
Exemple #31
0
flist = glob.glob("../trajectory.xtc")

top = md.load("../top.pdb")

trj_list = [md.load(i, top=top) for i in flist]
print("Found %d trajs" % len(trj_list))

f = DihedralFeaturizer(sincos=False)
dump(f, "raw_featurizer.pkl")

feat = f.transform(trj_list)

dump(feat, "raw_features.pkl")

f = load("./featurizer.pkl")
dump(f, "featurizer.pkl")
df1 = pd.DataFrame(f.describe_features(trj_list[0]))
dump(df1, "feature_descriptor.pkl")
feat = f.transform(trj_list)

dump(feat, "features.pkl")

t = tICA(lag_time=100, n_components=2, kinetic_mapping=False)

tica_feat = t.fit_transform(feat)

dump(t, "tica_mdl.pkl")
dump(tica_feat, "tica_features.pkl")

kmeans_mdl = KMeans(50)
Exemple #32
0
def test_dump_load():
    data = dict(name="Fancy_name", arr=np.random.rand(10, 5))
    with tempdir():
        dump(data, 'filename')
        data2 = load('filename')
    eq(data, data2)