Beispiel #1
0
def test_fitghmm():
    with tempdir():
        RawPositionsFeaturizer().save('featurizer.pickl')
        shell('hmsm fit-ghmm --featurizer featurizer.pickl  --n-init 10  '
                  ' --n-states 4 --dir %s --ext h5 --top %s' % (
                      DATADIR, os.path.join(DATADIR, 'Trajectory0.h5')))
        shell('hmsm inspect -i hmms.jsonlines --details')
        shell('hmsm sample-ghmm --no-match-vars -i hmms.jsonlines --lag-time 1 --n-state 4 '
              '--featurizer featurizer.pickl --dir %s --ext h5 --top %s' % (
                  DATADIR, os.path.join(DATADIR, 'Trajectory0.h5')))
        shell('hmsm means-ghmm -i hmms.jsonlines --lag-time 1 --n-state 4 '
              '--featurizer featurizer.pickl --dir %s --ext h5 --top %s' % (
                  DATADIR, os.path.join(DATADIR, 'Trajectory0.h5')))
        shell('hmsm structures means.csv --ext pdb --prefix means --top %s' %  os.path.join(DATADIR, 'Trajectory0.h5'))
        
        samples_csv = pd.read_csv('samples.csv', skiprows=1)
        means_csv = pd.read_csv('means.csv', skiprows=1)
        
        model = next(iterobjects('hmms.jsonlines'))
        means_pdb = md.load(glob('means-*.pdb'))

    means = np.array(sorted(model['means'], key=lambda e: e[0]))
    print('true\n', HMM.means_)
    print('learned\n', means)

    eq(HMM.means_, means, decimal=0)

    means_pdb_xyz = np.array(sorted(means_pdb.xyz.reshape(4, 3), key=lambda e: e[0]))
    eq(means_pdb_xyz, np.array(sorted(model['means'], key=lambda e:e[0])), decimal=0)
Beispiel #2
0
def test_fitghmm():
    with tempdir():
        RawPositionsFeaturizer(n_features=3).save('featurizer.pickl')
        shell('hmsm fit-ghmm --featurizer featurizer.pickl '
                  ' --n-states 4 --dir %s --ext h5 --top %s' % (
                      DATADIR, os.path.join(DATADIR, 'Trajectory0.h5')))
        shell('hmsm inspect -i hmms.jsonlines --details')
        shell('hmsm sample-ghmm --no-match-vars -i hmms.jsonlines --lag-time 1 --n-state 4 '
              '--featurizer featurizer.pickl --dir %s --ext h5 --top %s' % (
                  DATADIR, os.path.join(DATADIR, 'Trajectory0.h5')))
        shell('hmsm means-ghmm -i hmms.jsonlines --lag-time 1 --n-state 4 '
              '--featurizer featurizer.pickl --dir %s --ext h5 --top %s' % (
                  DATADIR, os.path.join(DATADIR, 'Trajectory0.h5')))
        shell('hmsm structures means.csv --ext pdb --prefix means --top %s' %  os.path.join(DATADIR, 'Trajectory0.h5'))
        
        samples_csv = pd.read_csv('samples.csv', skiprows=1)
        means_csv = pd.read_csv('means.csv', skiprows=1)
        
        model = next(iterobjects('hmms.jsonlines'))
        means_pdb = md.load(glob('means-*.pdb'))

    means = np.array(sorted(model['means'], key=lambda e: e[0]))
    eq(HMM.means_, means, decimal=0)
    
    print(samples_csv)
    print(means_csv)
    means_pdb_xyz = means_pdb.xyz.reshape(4, 3)
    eq(means_pdb_xyz, np.array(model['means']), decimal=0)
Beispiel #3
0
    def __init__(self, args):
        if os.path.exists(args.out):
            self.error('IOError: file exists: %s' % args.out)
        matches = [o for o in iterobjects(args.filename)
                   if o['n_states'] == args.n_states]
        if len(matches) == 0:
            self.error('No model with n_states=%d in %s.'
               % (args.n_states, args.filename))

        self.args = args
        self.model_dict = matches[0]
        self.out = args.out
        self.topology = md.load(args.top)
        self.filenames = glob.glob(
            os.path.join(os.path.expanduser(args.dir), '*.%s' % args.ext))
        self.featurizer = mixtape.featurizer.load(args.featurizer)
        self.stride = stride

        if len(self.filenames) == 0:
            self.error('No files matched.')
Beispiel #4
0
    def __init__(self, args):
        if os.path.exists(args.out):
            self.error('IOError: file exists: %s' % args.out)
        matches = [o for o in iterobjects(args.filename)
                   if o['n_states'] == args.n_states
                   and o['train_lag_time'] == args.lag_time]
        if len(matches) == 0:
            self.error('No model with n_states=%d, train_lag_time=%d in %s.' % (
                args.n_states, args.lag_time, args.filename))

        self.args = args
        self.model = matches[0]
        self.out = args.out
        self.topology = md.load(args.top)
        self.filenames = glob.glob(os.path.join(os.path.expanduser(args.dir), '*.%s' % args.ext))
        self.featurizer = mixtape.featurizer.load(args.featurizer)
        self.match_vars = args.match_vars

        if len(self.filenames) == 0:
            self.error('No files matched.')
import numpy as np
import pandas as pd
import mdtraj as md
from mixtape.utils import iterobjects

timestep = 1.0
n_states = 2
input_filename = "HMM/4000pairs-lag10.jsonlines"

models = list(iterobjects(input_filename))


models = [model for model in models if model["n_states"] == n_states]

lagtimes = np.array([model["train_lag_time"] for model in models]) * timestep
timescales = np.array([model["timescales"] for model in models]) * timestep

plot(lagtimes, timescales[:,0], 'o')
Beispiel #6
0
import numpy as np
import pandas as pd
import mdtraj as md
from mixtape.utils import iterobjects, assign
import mixtape.ghmm, mixtape.featurizer
import sklearn.hmm
import os

name = "tica"
json_filename = "./%s.jsonlines" % name
feature_filename = "./%s.pkl" % name

featurizer = mixtape.featurizer.load(feature_filename)

models = list(iterobjects(json_filename))
df = pd.DataFrame(models)

x = df.ix[0]
T = np.array(x["transmat"])
p = np.array(x["populations"])
n_states = len(p)


model = mixtape.ghmm.GaussianFusionHMM(n_states, featurizer.n_features)
model.means_ = x["means"]
model.vars_ = x["vars"]
model.transmat_ = x["transmat"]
model.populations_ = x["populations"]


means = model.means_
Beispiel #7
0
 def __init__(self, args):
     self.args = args
     self.models = list(iterobjects(self.args.filename))
     self.df = pd.DataFrame(self.models)
     self.details = args.details
Beispiel #8
0
    def start(self):
        exclude = set(
            [
                "means",
                "vars",
                "kappas",
                "train_logprobs",
                "transmat",
                "split",
                "test_logprob",
                "n_test_observations",
                "fusion_prior",
                "test_lag_time",
                "cross_validation_fold",
                "train_time",
                "cross_validation_nfolds",
            ]
        )
        models = [
            {k: v for k, v in list(m.items()) if k not in exclude} for f in self.args.filename for m in iterobjects(f)
        ]

        explode_key_in_listofdicts("timescales", models)
        explode_key_in_listofdicts("populations", models)
        self.models = pd.DataFrame(models)

        print("Columns:\n    %s" % "\n    ".join(wrap(", ".join(self.models.columns))))
        print('Saving models as CSV file to "%s"' % self.args.out_csv)
        self.models.to_csv(self.args.out_csv, index=False)

        if self.args.x is not None and self.args.y is not None:
            self.plot()
Beispiel #9
0
 def __init__(self, args):
     self.args = args
     self.models = list(iterobjects(self.args.filename))
     self.df = pd.DataFrame(self.models)
     self.details = args.details
Beispiel #10
0
import shutil
import numpy as np
import pandas as pd
import mdtraj as md
from mixtape.utils import iterobjects
import mixtape.ghmm
import mixtape.featurizer
import os

name = "atomindices"
json_filename = "./%s.jsonlines" % name
feature_filename = "./%s.pkl" % name

models = list(iterobjects(json_filename))
df = pd.DataFrame(models)

x = df.ix[0]

T = np.array(x["transmat"])
p = np.array(x["populations"])

featurizer = mixtape.featurizer.load(feature_filename)

model = mixtape.ghmm.GaussianFusionHMM(3, featurizer.n_features)
model.means_ = x["means"]
model.vars_ = x["vars"]
model.transmat_ = x["transmat"]
model.populations_ = x["populations"]

trj0 = md.load("./system.subset.pdb")
atom_indices = np.loadtxt("./AtomIndices.dat", "int")