def test_ala2(): # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes # sure the code runs without erroring out trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianHMM(n_states=4, n_init=3) hmm.fit(sequences) assert len(hmm.timescales_ == 3) assert np.any(hmm.timescales_ > 50)
def makeHMM(Trajectories, topology): top = md.load_prmtop(topology) alpha_carbons = [a.index for a in top.atoms if a.name == 'CA'] filenames = sorted(glob(Trajectories)) first_frame = md.load_frame(filenames[0], 0, top=top) f = SuperposeFeaturizer(alpha_carbons, first_frame) dataset = [] for fragment in filenames: for chunk in md.iterload(fragment, chunk=100, top=top): dataset.append(f.partial_transform(chunk)) hmm = GaussianHMM(n_states=8) hmm.fit(dataset) print(hmm.timescales_) return hmm
def test_ala2(): # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes # sure the code runs without erroring out trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs) hmm.fit(sequences) assert len(hmm.timescales_ == 3) assert np.any(hmm.timescales_ > 50)
def test_that_all_featurizers_run(): # TODO: include all featurizers, perhaps with generator tests trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all = featurizer.transform(trajectories) featurizer = SuperposeFeaturizer(np.arange(15), trj0) X_all = featurizer.transform(trajectories) featurizer = DihedralFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) featurizer = VonMisesFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) # Below doesn't work on ALA dipeptide # featurizer = msmbuilder.featurizer.ContactFeaturizer() # X_all = featurizer.transform(trajectories) featurizer = RMSDFeaturizer(trj0) X_all = featurizer.transform(trajectories)
def test_pickle(): """Test pickling an HMM""" trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs) hmm.fit(sequences) logprob, hidden = hmm.predict(sequences) with tempfile.TemporaryFile() as savefile: pickle.dump(hmm, savefile) savefile.seek(0, 0) hmm2 = pickle.load(savefile) logprob2, hidden2 = hmm2.predict(sequences) assert (logprob == logprob2)
def test_pickle(): """Test pickling an HMM""" trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs) hmm.fit(sequences) logprob, hidden = hmm.predict(sequences) with tempfile.TemporaryFile() as savefile: pickle.dump(hmm, savefile) savefile.seek(0, 0) hmm2 = pickle.load(savefile) logprob2, hidden2 = hmm2.predict(sequences) assert(logprob == logprob2)
def test_1(): # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes # sure the code runs without erroring out dataset = AlanineDipeptide().get() trajectories = dataset.trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianFusionHMM(n_states=4, n_features=sequences[0].shape[1], n_init=1) hmm.fit(sequences) assert len(hmm.timescales_ == 3) assert np.any(hmm.timescales_ > 50)
def test_pipeline(): trajs = AlanineDipeptide().get_cached().trajectories topology = trajs[0].topology indices = topology.select('backbone') p = Pipeline([('diheds', SuperposeFeaturizer(indices, trajs[0][0])), ('hmm', GaussianHMM(n_states=4))]) predict = p.fit_predict(trajs) p.named_steps['hmm'].summarize()
plt.style.use("ggplot") from msmbuilder.featurizer import SuperposeFeaturizer from msmbuilder.example_datasets import AlanineDipeptide from msmbuilder.hmm import GaussianHMM from msmbuilder.cluster import KCenters from msmbuilder.msm import MarkovStateModel dataset = AlanineDipeptide().get() trajectories = dataset.trajectories topology = trajectories[0].topology indices = [ atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N'] ] featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~ HIDDEN MARKOV MODEL ~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lag_times = [1, 10, 20, 30, 40] hmm_ts0 = {} hmm_ts1 = {} n_states = [3, 5] for n in n_states: hmm_ts0[n] = [] hmm_ts1[n] = [] for lag_time in lag_times: strided_data = [
from msmbuilder.hmm import GaussianHMM from msmbuilder.cluster import KCenters from msmbuilder.msm import MarkovStateModel from msmbuilder.dataset import dataset import mdtraj as md from glob import glob filenames = sorted(glob("05_Prod_*.nc")) topology = md.load_prmtop(glob("*nowat.prmtop")[0]) first_frame = md.load_frame(filenames[0], 0, top=topology) indices = [ atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N'] ] featurizer = SuperposeFeaturizer(indices, first_frame) sequences = [] for fragment in filenames: for chunk in md.iterload(fragment, chunk=100, top=topology): sequences.append(featurizer.partial_transform(chunk)) lag_times = [1, 20, 50, 100, 200, 400] hmm_ts0 = {} hmm_ts1 = {} n_states = [n for n in range(2, 11, 2)] for n in n_states: hmm_ts0[n] = [] hmm_ts1[n] = [] for lag_time in lag_times:
from msmbuilder.hmm import GaussianHMM from msmbuilder.cluster import KCenters from msmbuilder.msm import MarkovStateModel from msmbuilder.dataset import dataset import mdtraj as md from glob import glob filenames = sorted(glob("05_Prod_*.nc")) topology = md.load_prmtop(glob("*nowat.prmtop")[0]) first_frame = md.load_frame(filenames[0], 0, top=topology) indices = [atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N']] featurizer = SuperposeFeaturizer(indices, first_frame) sequences = [] for fragment in filenames: for chunk in md.iterload(fragment, chunk = 100, top = topology): sequences.append(featurizer.partial_transform(chunk)) lag_times = [1, 20, 50, 100, 200, 400] hmm_ts0 = {} hmm_ts1 = {} n_states = [n for n in range(2,11,2)] for n in n_states: hmm_ts0[n] = [] hmm_ts1[n] = []
from matplotlib.pyplot import * import matplotlib.pyplot as plt plt.style.use("ggplot") from msmbuilder.featurizer import SuperposeFeaturizer from msmbuilder.example_datasets import AlanineDipeptide from msmbuilder.hmm import GaussianHMM from msmbuilder.cluster import KCenters from msmbuilder.msm import MarkovStateModel dataset = AlanineDipeptide().get() trajectories = dataset.trajectories topology = trajectories[0].topology indices = [atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N']] featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~ HIDDEN MARKOV MODEL ~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lag_times = [1, 10, 20, 30, 40] hmm_ts0 = {} hmm_ts1 = {} n_states = [3, 5] for n in n_states: hmm_ts0[n] = [] hmm_ts1[n] = [] for lag_time in lag_times: strided_data = [s[i::lag_time] for s in sequences for i in range(lag_time)]