def test_ala2(): # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes # sure the code runs without erroring out trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianHMM(n_states=4, n_init=3) hmm.fit(sequences) assert len(hmm.timescales_ == 3) assert np.any(hmm.timescales_ > 50)
def test_ala2(): # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes # sure the code runs without erroring out trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs) hmm.fit(sequences) assert len(hmm.timescales_ == 3) assert np.any(hmm.timescales_ > 50)
def test_pickle(): """Test pickling an HMM""" trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs) hmm.fit(sequences) logprob, hidden = hmm.predict(sequences) with tempfile.TemporaryFile() as savefile: pickle.dump(hmm, savefile) savefile.seek(0, 0) hmm2 = pickle.load(savefile) logprob2, hidden2 = hmm2.predict(sequences) assert (logprob == logprob2)
def test_pickle(): """Test pickling an HMM""" trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianHMM(n_states=4, n_init=3, random_state=rs) hmm.fit(sequences) logprob, hidden = hmm.predict(sequences) with tempfile.TemporaryFile() as savefile: pickle.dump(hmm, savefile) savefile.seek(0, 0) hmm2 = pickle.load(savefile) logprob2, hidden2 = hmm2.predict(sequences) assert(logprob == logprob2)
def test_1(): # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes # sure the code runs without erroring out dataset = AlanineDipeptide().get() trajectories = dataset.trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = GaussianFusionHMM(n_states=4, n_features=sequences[0].shape[1], n_init=1) hmm.fit(sequences) assert len(hmm.timescales_ == 3) assert np.any(hmm.timescales_ > 50)
from msmbuilder.featurizer import SuperposeFeaturizer from msmbuilder.example_datasets import AlanineDipeptide from msmbuilder.hmm import GaussianHMM from msmbuilder.cluster import KCenters from msmbuilder.msm import MarkovStateModel dataset = AlanineDipeptide().get() trajectories = dataset.trajectories topology = trajectories[0].topology indices = [ atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N'] ] featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~ HIDDEN MARKOV MODEL ~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lag_times = [1, 10, 20, 30, 40] hmm_ts0 = {} hmm_ts1 = {} n_states = [3, 5] for n in n_states: hmm_ts0[n] = [] hmm_ts1[n] = [] for lag_time in lag_times: strided_data = [ s[i::lag_time] for s in sequences for i in range(lag_time)
import matplotlib.pyplot as plt plt.style.use("ggplot") from msmbuilder.featurizer import SuperposeFeaturizer from msmbuilder.example_datasets import AlanineDipeptide from msmbuilder.hmm import GaussianHMM from msmbuilder.cluster import KCenters from msmbuilder.msm import MarkovStateModel dataset = AlanineDipeptide().get() trajectories = dataset.trajectories topology = trajectories[0].topology indices = [atom.index for atom in topology.atoms if atom.element.symbol in ['C', 'O', 'N']] featurizer = SuperposeFeaturizer(indices, trajectories[0][0]) sequences = featurizer.transform(trajectories) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~ HIDDEN MARKOV MODEL ~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lag_times = [1, 10, 20, 30, 40] hmm_ts0 = {} hmm_ts1 = {} n_states = [3, 5] for n in n_states: hmm_ts0[n] = [] hmm_ts1[n] = [] for lag_time in lag_times: strided_data = [s[i::lag_time] for s in sequences for i in range(lag_time)] hmm = GaussianHMM(n_states=n, n_init=1).fit(strided_data)