def test_fitghmm(): with tempdir(): RawPositionsFeaturizer().save('featurizer.pickl') shell('hmsm fit-ghmm --featurizer featurizer.pickl --n-init 10 ' ' --n-states 4 --dir %s --ext h5 --top %s' % ( DATADIR, os.path.join(DATADIR, 'Trajectory0.h5'))) shell('hmsm inspect -i hmms.jsonlines --details') shell('hmsm sample-ghmm --no-match-vars -i hmms.jsonlines --lag-time 1 --n-state 4 ' '--featurizer featurizer.pickl --dir %s --ext h5 --top %s' % ( DATADIR, os.path.join(DATADIR, 'Trajectory0.h5'))) shell('hmsm means-ghmm -i hmms.jsonlines --lag-time 1 --n-state 4 ' '--featurizer featurizer.pickl --dir %s --ext h5 --top %s' % ( DATADIR, os.path.join(DATADIR, 'Trajectory0.h5'))) shell('hmsm structures means.csv --ext pdb --prefix means --top %s' % os.path.join(DATADIR, 'Trajectory0.h5')) samples_csv = pd.read_csv('samples.csv', skiprows=1) means_csv = pd.read_csv('means.csv', skiprows=1) model = next(iterobjects('hmms.jsonlines')) means_pdb = md.load(glob('means-*.pdb')) means = np.array(sorted(model['means'], key=lambda e: e[0])) print('true\n', HMM.means_) print('learned\n', means) eq(HMM.means_, means, decimal=0) means_pdb_xyz = np.array(sorted(means_pdb.xyz.reshape(4, 3), key=lambda e: e[0])) eq(means_pdb_xyz, np.array(sorted(model['means'], key=lambda e:e[0])), decimal=0)
def test_fitghmm(): with tempdir(): RawPositionsFeaturizer(n_features=3).save('featurizer.pickl') shell('hmsm fit-ghmm --featurizer featurizer.pickl ' ' --n-states 4 --dir %s --ext h5 --top %s' % ( DATADIR, os.path.join(DATADIR, 'Trajectory0.h5'))) shell('hmsm inspect -i hmms.jsonlines --details') shell('hmsm sample-ghmm --no-match-vars -i hmms.jsonlines --lag-time 1 --n-state 4 ' '--featurizer featurizer.pickl --dir %s --ext h5 --top %s' % ( DATADIR, os.path.join(DATADIR, 'Trajectory0.h5'))) shell('hmsm means-ghmm -i hmms.jsonlines --lag-time 1 --n-state 4 ' '--featurizer featurizer.pickl --dir %s --ext h5 --top %s' % ( DATADIR, os.path.join(DATADIR, 'Trajectory0.h5'))) shell('hmsm structures means.csv --ext pdb --prefix means --top %s' % os.path.join(DATADIR, 'Trajectory0.h5')) samples_csv = pd.read_csv('samples.csv', skiprows=1) means_csv = pd.read_csv('means.csv', skiprows=1) model = next(iterobjects('hmms.jsonlines')) means_pdb = md.load(glob('means-*.pdb')) means = np.array(sorted(model['means'], key=lambda e: e[0])) eq(HMM.means_, means, decimal=0) print(samples_csv) print(means_csv) means_pdb_xyz = means_pdb.xyz.reshape(4, 3) eq(means_pdb_xyz, np.array(model['means']), decimal=0)
def __init__(self, args): if os.path.exists(args.out): self.error('IOError: file exists: %s' % args.out) matches = [o for o in iterobjects(args.filename) if o['n_states'] == args.n_states] if len(matches) == 0: self.error('No model with n_states=%d in %s.' % (args.n_states, args.filename)) self.args = args self.model_dict = matches[0] self.out = args.out self.topology = md.load(args.top) self.filenames = glob.glob( os.path.join(os.path.expanduser(args.dir), '*.%s' % args.ext)) self.featurizer = mixtape.featurizer.load(args.featurizer) self.stride = stride if len(self.filenames) == 0: self.error('No files matched.')
def __init__(self, args): if os.path.exists(args.out): self.error('IOError: file exists: %s' % args.out) matches = [o for o in iterobjects(args.filename) if o['n_states'] == args.n_states and o['train_lag_time'] == args.lag_time] if len(matches) == 0: self.error('No model with n_states=%d, train_lag_time=%d in %s.' % ( args.n_states, args.lag_time, args.filename)) self.args = args self.model = matches[0] self.out = args.out self.topology = md.load(args.top) self.filenames = glob.glob(os.path.join(os.path.expanduser(args.dir), '*.%s' % args.ext)) self.featurizer = mixtape.featurizer.load(args.featurizer) self.match_vars = args.match_vars if len(self.filenames) == 0: self.error('No files matched.')
import numpy as np import pandas as pd import mdtraj as md from mixtape.utils import iterobjects timestep = 1.0 n_states = 2 input_filename = "HMM/4000pairs-lag10.jsonlines" models = list(iterobjects(input_filename)) models = [model for model in models if model["n_states"] == n_states] lagtimes = np.array([model["train_lag_time"] for model in models]) * timestep timescales = np.array([model["timescales"] for model in models]) * timestep plot(lagtimes, timescales[:,0], 'o')
import numpy as np import pandas as pd import mdtraj as md from mixtape.utils import iterobjects, assign import mixtape.ghmm, mixtape.featurizer import sklearn.hmm import os name = "tica" json_filename = "./%s.jsonlines" % name feature_filename = "./%s.pkl" % name featurizer = mixtape.featurizer.load(feature_filename) models = list(iterobjects(json_filename)) df = pd.DataFrame(models) x = df.ix[0] T = np.array(x["transmat"]) p = np.array(x["populations"]) n_states = len(p) model = mixtape.ghmm.GaussianFusionHMM(n_states, featurizer.n_features) model.means_ = x["means"] model.vars_ = x["vars"] model.transmat_ = x["transmat"] model.populations_ = x["populations"] means = model.means_
def __init__(self, args): self.args = args self.models = list(iterobjects(self.args.filename)) self.df = pd.DataFrame(self.models) self.details = args.details
def start(self): exclude = set( [ "means", "vars", "kappas", "train_logprobs", "transmat", "split", "test_logprob", "n_test_observations", "fusion_prior", "test_lag_time", "cross_validation_fold", "train_time", "cross_validation_nfolds", ] ) models = [ {k: v for k, v in list(m.items()) if k not in exclude} for f in self.args.filename for m in iterobjects(f) ] explode_key_in_listofdicts("timescales", models) explode_key_in_listofdicts("populations", models) self.models = pd.DataFrame(models) print("Columns:\n %s" % "\n ".join(wrap(", ".join(self.models.columns)))) print('Saving models as CSV file to "%s"' % self.args.out_csv) self.models.to_csv(self.args.out_csv, index=False) if self.args.x is not None and self.args.y is not None: self.plot()
import shutil import numpy as np import pandas as pd import mdtraj as md from mixtape.utils import iterobjects import mixtape.ghmm import mixtape.featurizer import os name = "atomindices" json_filename = "./%s.jsonlines" % name feature_filename = "./%s.pkl" % name models = list(iterobjects(json_filename)) df = pd.DataFrame(models) x = df.ix[0] T = np.array(x["transmat"]) p = np.array(x["populations"]) featurizer = mixtape.featurizer.load(feature_filename) model = mixtape.ghmm.GaussianFusionHMM(3, featurizer.n_features) model.means_ = x["means"] model.vars_ = x["vars"] model.transmat_ = x["transmat"] model.populations_ = x["populations"] trj0 = md.load("./system.subset.pdb") atom_indices = np.loadtxt("./AtomIndices.dat", "int")