Esempio n. 1
0
def test_ala2():
    # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes
    # sure the code runs without erroring out
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology

    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = SuperposeFeaturizer(indices, trajectories[0][0])

    sequences = featurizer.transform(trajectories)
    hmm = GaussianHMM(n_states=4, n_init=3)
    hmm.fit(sequences)

    assert len(hmm.timescales_ == 3)
    assert np.any(hmm.timescales_ > 50)
Esempio n. 2
0
def makeHMM(Trajectories, topology):
    top = md.load_prmtop(topology)
    alpha_carbons = [a.index for a in top.atoms if a.name == 'CA']
    filenames = sorted(glob(Trajectories))
    first_frame = md.load_frame(filenames[0], 0, top=top)

    f = SuperposeFeaturizer(alpha_carbons, first_frame)
    dataset = []
    for fragment in filenames:
        for chunk in md.iterload(fragment, chunk=100, top=top):
            dataset.append(f.partial_transform(chunk))
    hmm = GaussianHMM(n_states=8)
    hmm.fit(dataset)
    print(hmm.timescales_)
    return hmm
Esempio n. 3
0
def test_ala2():
    # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes
    # sure the code runs without erroring out
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology

    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = SuperposeFeaturizer(indices, trajectories[0][0])

    sequences = featurizer.transform(trajectories)
    hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs)
    hmm.fit(sequences)

    assert len(hmm.timescales_ == 3)
    assert np.any(hmm.timescales_ > 50)
Esempio n. 4
0
def makeHMM(Trajectories, topology):
    top = md.load_prmtop(topology)
    alpha_carbons = [a.index for a in top.atoms if a.name == 'CA']
    filenames = sorted(glob(Trajectories))
    first_frame = md.load_frame(filenames[0], 0, top=top)

    f = SuperposeFeaturizer(alpha_carbons, first_frame)
    dataset = []
    for fragment in filenames:
            for chunk in md.iterload(fragment, chunk=100, top=top):
                dataset.append(f.partial_transform(chunk))
    hmm = GaussianHMM(n_states=8)
    hmm.fit(dataset)
    print(hmm.timescales_)
    return hmm
Esempio n. 5
0
def test_that_all_featurizers_run():
    # TODO: include all featurizers, perhaps with generator tests

    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)

    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all = featurizer.transform(trajectories)

    featurizer = SuperposeFeaturizer(np.arange(15), trj0)
    X_all = featurizer.transform(trajectories)

    featurizer = DihedralFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    featurizer = VonMisesFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    # Below doesn't work on ALA dipeptide
    # featurizer = msmbuilder.featurizer.ContactFeaturizer()
    # X_all = featurizer.transform(trajectories)

    featurizer = RMSDFeaturizer(trj0)
    X_all = featurizer.transform(trajectories)
Esempio n. 6
0
def test_pickle():
    """Test pickling an HMM"""
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology
    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = SuperposeFeaturizer(indices, trajectories[0][0])
    sequences = featurizer.transform(trajectories)
    hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs)
    hmm.fit(sequences)
    logprob, hidden = hmm.predict(sequences)

    with tempfile.TemporaryFile() as savefile:
        pickle.dump(hmm, savefile)
        savefile.seek(0, 0)
        hmm2 = pickle.load(savefile)

    logprob2, hidden2 = hmm2.predict(sequences)
    assert (logprob == logprob2)
Esempio n. 7
0
def test_pickle():
    """Test pickling an HMM"""
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology
    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = SuperposeFeaturizer(indices, trajectories[0][0])
    sequences = featurizer.transform(trajectories)
    hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs)
    hmm.fit(sequences)
    logprob, hidden = hmm.predict(sequences)

    with tempfile.TemporaryFile() as savefile:
        pickle.dump(hmm, savefile)
        savefile.seek(0, 0)
        hmm2 = pickle.load(savefile)

    logprob2, hidden2 = hmm2.predict(sequences)
    assert(logprob == logprob2)
Esempio n. 8
0
def test_1():
    # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes
    # sure the code runs without erroring out
    dataset = AlanineDipeptide().get()
    trajectories = dataset.trajectories
    topology = trajectories[0].topology

    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = SuperposeFeaturizer(indices, trajectories[0][0])

    sequences = featurizer.transform(trajectories)
    hmm = GaussianFusionHMM(n_states=4,
                            n_features=sequences[0].shape[1],
                            n_init=1)
    hmm.fit(sequences)

    assert len(hmm.timescales_ == 3)
    assert np.any(hmm.timescales_ > 50)
Esempio n. 9
0
def test_pipeline():
    trajs = AlanineDipeptide().get_cached().trajectories
    topology = trajs[0].topology

    indices = topology.select('backbone')
    p = Pipeline([('diheds', SuperposeFeaturizer(indices, trajs[0][0])),
                  ('hmm', GaussianHMM(n_states=4))])

    predict = p.fit_predict(trajs)
    p.named_steps['hmm'].summarize()
Esempio n. 10
0
plt.style.use("ggplot")
from msmbuilder.featurizer import SuperposeFeaturizer
from msmbuilder.example_datasets import AlanineDipeptide
from msmbuilder.hmm import GaussianHMM
from msmbuilder.cluster import KCenters
from msmbuilder.msm import MarkovStateModel

dataset = AlanineDipeptide().get()
trajectories = dataset.trajectories
topology = trajectories[0].topology

indices = [
    atom.index for atom in topology.atoms
    if atom.element.symbol in ['C', 'O', 'N']
]
featurizer = SuperposeFeaturizer(indices, trajectories[0][0])
sequences = featurizer.transform(trajectories)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~~~~        HIDDEN MARKOV MODEL     ~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
lag_times = [1, 10, 20, 30, 40]
hmm_ts0 = {}
hmm_ts1 = {}
n_states = [3, 5]

for n in n_states:
    hmm_ts0[n] = []
    hmm_ts1[n] = []
    for lag_time in lag_times:
        strided_data = [
Esempio n. 11
0
from msmbuilder.hmm import GaussianHMM
from msmbuilder.cluster import KCenters
from msmbuilder.msm import MarkovStateModel
from msmbuilder.dataset import dataset
import mdtraj as md
from glob import glob

filenames = sorted(glob("05_Prod_*.nc"))
topology = md.load_prmtop(glob("*nowat.prmtop")[0])

first_frame = md.load_frame(filenames[0], 0, top=topology)
indices = [
    atom.index for atom in topology.atoms
    if atom.element.symbol in ['C', 'O', 'N']
]
featurizer = SuperposeFeaturizer(indices, first_frame)
sequences = []

for fragment in filenames:
    for chunk in md.iterload(fragment, chunk=100, top=topology):
        sequences.append(featurizer.partial_transform(chunk))

lag_times = [1, 20, 50, 100, 200, 400]
hmm_ts0 = {}
hmm_ts1 = {}
n_states = [n for n in range(2, 11, 2)]

for n in n_states:
    hmm_ts0[n] = []
    hmm_ts1[n] = []
    for lag_time in lag_times:
Esempio n. 12
0
from msmbuilder.hmm import GaussianHMM
from msmbuilder.cluster import KCenters
from msmbuilder.msm import MarkovStateModel
from msmbuilder.dataset import dataset
import mdtraj as md
from glob import glob




filenames = sorted(glob("05_Prod_*.nc"))
topology = md.load_prmtop(glob("*nowat.prmtop")[0])

first_frame = md.load_frame(filenames[0], 0, top=topology)
indices = [atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N']]
featurizer = SuperposeFeaturizer(indices, first_frame)
sequences = []

for fragment in filenames:
    for chunk in md.iterload(fragment, chunk = 100, top = topology):
        sequences.append(featurizer.partial_transform(chunk))

lag_times = [1, 20, 50, 100, 200, 400]
hmm_ts0 = {}
hmm_ts1 = {}
n_states = [n for n in range(2,11,2)]


for n in n_states:
    hmm_ts0[n] = []
    hmm_ts1[n] = []
from matplotlib.pyplot import *
import matplotlib.pyplot as plt
plt.style.use("ggplot")
from msmbuilder.featurizer import SuperposeFeaturizer
from msmbuilder.example_datasets import AlanineDipeptide
from msmbuilder.hmm import GaussianHMM
from msmbuilder.cluster import KCenters
from msmbuilder.msm import MarkovStateModel


dataset = AlanineDipeptide().get()
trajectories = dataset.trajectories
topology = trajectories[0].topology

indices = [atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N']]
featurizer = SuperposeFeaturizer(indices, trajectories[0][0])
sequences = featurizer.transform(trajectories)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~~~~        HIDDEN MARKOV MODEL     ~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
lag_times = [1, 10, 20, 30, 40]
hmm_ts0 = {}
hmm_ts1 = {}
n_states = [3, 5]

for n in n_states:
    hmm_ts0[n] = []
    hmm_ts1[n] = []
    for lag_time in lag_times:
        strided_data = [s[i::lag_time] for s in sequences for i in range(lag_time)]