Example #1
0
    def build_model(self, user_defined_model):
        """
        Load or build a model (Pipeline from scikit-learn) to do all the transforming and fitting
        :param user_defined_model: Either a string (to load from disk) or a Pipeline object to use as model
        :return model: Return the model back
        """
        if user_defined_model is None:
            if os.path.exists(self.model_pkl_fname):
                logger.info('Loading model pkl file {}'.format(
                    self.model_pkl_fname))
                model = load_generic(self.model_pkl_fname)
            else:
                logger.info('Building default model based on dihedrals')

                # build a lag time of 1 ns for tICA and msm
                # if the stride is too big and we can't do that
                # use 1 frame and report how much that is in ns
                if self.app.meta is not None:
                    lag_time = max(1, int(1 / self.timestep))
                    logger.info(
                        'Using a lag time of {} ns for the tICA and MSM'.
                        format(lag_time * self.timestep))
                else:
                    self.timestep = None
                    lag_time = 1
                    logger.warning(
                        'Cannot determine timestep. Defaulting to 1 frame.'.
                        format(lag_time))
                model = Pipeline([('feat', DihedralFeaturizer()),
                                  ('scaler', RobustScaler()),
                                  ('tICA',
                                   tICA(lag_time=lag_time,
                                        commute_mapping=True,
                                        n_components=10)),
                                  ('clusterer',
                                   MiniBatchKMeans(n_clusters=200)),
                                  ('msm',
                                   MarkovStateModel(lag_time=lag_time,
                                                    ergodic_cutoff='off',
                                                    reversible_type=None))])
        else:
            if not isinstance(user_defined_model, Pipeline):
                raise ValueError(
                    'model is not an sklearn.pipeline.Pipeline object')
            else:
                logger.info('Using user defined model')
                model = user_defined_model
        return model
"""

import matplotlib

matplotlib.use('Agg')
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
kmeans = load_generic('kmeans.pickl')
msm = load_generic('msm.pickl')
meta, ttrajs = load_trajs('ttrajs')
txx = np.concatenate(list(ttrajs.values()))


## Plot microstates
def plot_microstates(ax):
    ax.hexbin(
        txx[:, 0],
        txx[:, 1],
        cmap='Greys',
        mincnt=1,
        bins='log',
    )
import seaborn as sns
import matplotlib

matplotlib.use('Agg')
from matplotlib.pylab import plt
from matplotlib.ticker import FormatStrFormatter

sns.set_style("white")


def print_timescales(timescales):
    pass


if __name__ == "__main__":
    all_msms = load_generic('rmsd_msms.pickl')
    meta, ctraj_dict = load_trajs('ctraj-200')
    long_ctrajs = [
        np.squeeze(traj) for traj in ctraj_dict.values()
        if traj.shape[0] > 1000
    ]

    ps_to_ns = 1000
    n_ts = 10
    timescales = []
    lags = []
    for msm in all_msms:
        timescales.append(msm.timescales_[:n_ts])
        lags.append(msm.get_params()['lag_time'])
    lags = np.array(lags)
    timescales = np.array(timescales).T / ps_to_ns
Example #4
0
from msmbuilder.io import load_generic
import numpy as np
import matplotlib
matplotlib.use('Agg')
from matplotlib.pylab import plt
import sys
import seaborn as sns
colors = sns.color_palette("colorblind", 8)
import pandas as pd

df = load_generic('grid_search.pickl')
df = df.filter(regex=("param_.*|split.*"))
id_cols = list(df.filter(regex=("param_.*")).columns)
var_cols = list(df.filter(regex=("split.*")).columns)

df = pd.melt(df, id_vars=id_cols, value_vars=var_cols, value_name='GMRQ')
df['Data'] = df['variable'].str.extract('(test|train)', expand=True)

g = sns.factorplot(x="param_tica__lag_time",
                   y="value",
                   hue="param_tica__n_components",
                   col='param_cluster__n_clusters',
                   data=df.ix[df['Data'] == 'test', :])
# parameter = 'n_clusters'
# fig, ax = plt.subplots()
# ax.errorbar(x=df['param_cluster__{}'.format(parameter)], y=df['mean_test_score'], yerr=df['std_test_score']*2)
# ax.set_xlabel('{} value'.format(parameter))
# ax.set_ylabel('GMRQ Score')
plt.savefig('results.pdf')
Example #5
0
"""

# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
kmeans = load_generic('kmeans.pickl')
meta, ktrajs = load_trajs('ktrajs')
meta, ttrajs = load_trajs('ttrajs', meta)
txx = np.concatenate(list(ttrajs.values()))


def plot_cluster_centers(ax):
    ax.hexbin(
        txx[:, 0],
        txx[:, 1],
        cmap=sns.cubehelix_palette(as_cmap=True),
        mincnt=1,
        bins='log',
    )
    ax.scatter(
        kmeans.cluster_centers_[:, 0],
Example #6
0

def plot_single_var(results_df, which, fname='results'):
    """
    plots single variable against the score. 
    :param results_df: appopriately subsetted dataframe
    :param which: which parameter to plot
    :return: None
    """
    label = which.split('__')[1]
    fig, ax = plt.subplots()
    x = results_df[which].values
    y = results_df['mean_test_score']
    err = results_df['std_test_score']
    ax.errorbar(x, y, err)
    ax.set_xscale("log")
    ax.set_ylabel('Score')
    ax.set_xlabel('{}'.format(label))
    plt.savefig('figures/{0}-{1}.png'.format(fname, label))


search_params = load_generic('models/rmsd_model.pickl')
df = pd.DataFrame(search_params.cv_results_)
plot_single_var(df, which='param_cluster__n_clusters')
print(df.head())
# best_model = search_params.best_estimator_
# msm = best_model.named_steps['msm']
#
# plot_eigenvectors(model=msm, number=3)
# plot_eigenvectors(model=msm, number=3, which='right')
Example #7
0
msmbuilder autogenerated template version 2
created 2017-05-23T16:38:49.109805
please cite msmbuilder in any publications


"""

import mdtraj as md
import os

from msmbuilder.io.sampling import sample_states
from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic

## Load
meta, ttrajs = load_trajs('ttrajs')
kmeans = load_generic("kmeans.pickl")

## Sample
inds = sample_states(ttrajs, kmeans.cluster_centers_, k=10)

save_generic(inds, "cluster-sample-inds.pickl")

## Make trajectories
top = preload_top(meta)
out_folder = "cluster_samples"
backup(out_folder)
os.mkdir(out_folder)

for state_i, state_inds in enumerate(inds):
    traj = md.join(
        md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
sns.set_style('ticks')
colors = sns.color_palette()
import pickle
start_time = time.time()
## Load
meta, ktrajs = load_trajs('../../ktrajs_cen_30_100_5')

dtrajs = list(ktrajs.values())
type(dtrajs[0])
## Fit
msm = bayesian_markov_model(dtrajs, lag=16, nsamples=100000)

print('done with bmm')

## Load
kmeans = load_generic('../../kcenters_30_100_5.pickl')
msm2 = load_generic('../msm_kcen_30_100_5_16.pickl')
meta, ttrajs = load_trajs('../../../ttrajs_a0_30')
txx = np.concatenate(list(ttrajs.values()))
a1 = ttrajs[14]

print('done with load')

print('active_count_fraction is ', msm.active_count_fraction)
print('active_state_fraction is ', msm.active_state_fraction)
print('shape of transition matrix is ', msm.P.shape)
print('mean of eigenvalues is ', msm.sample_mean('eigenvalues', 7))
print('std of eigenvalues is ', msm.sample_std('eigenvalues', 7))
print('mean of pi is ', msm.sample_mean('pi'))
print('std of pi is ', msm.sample_std('pi'))
print('shape of right eigenvectors is ',
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
meta, ttrajs = load_trajs('ttrajs')
txx = np.concatenate(list(ttrajs.values()))

inds = load_generic("tica-dimension-0-inds.pickl")
straj = []
for traj_i, frame_i in inds:
    straj += [ttrajs[traj_i][frame_i, :]]
straj = np.asarray(straj)


## Overlay sampled trajectory on histogram
def plot_sampled_traj(ax):
    ax.hexbin(txx[:, 0], txx[:, 1],
              cmap='magma_r',
              mincnt=1,
              bins='log',
              alpha=0.8,
              )
Example #10
0
from msmbuilder.io import load_generic
import numpy as np
import matplotlib
matplotlib.use('Agg')
from matplotlib.pylab import plt
import sys
import seaborn as sns
colors = sns.color_palette("colorblind", 8)
import pandas as pd
from glob import glob

files = glob("*.pickl")
all_dfs = []
for file in files:
    all_dfs.append(load_generic(file))

df = pd.concat(all_dfs)
df.sort_values(by='param_cluster__n_clusters', inplace=True)

# df = df.filter(regex=("param_.*|split.*"))
# id_cols = list(df.filter(regex=("param_.*")).columns)
# var_cols = list(df.filter(regex=("split.*")).columns)
#
# df = pd.melt(df,id_vars=id_cols, value_vars=var_cols, value_name='GMRQ')
# df['Data'] = df['variable'].str.extract('(test|train)', expand=True)

parameter = 'n_clusters'
fig, ax = plt.subplots()
ax.errorbar(x=df['param_cluster__{}'.format(parameter)],
            y=df['mean_test_score'],
            yerr=df['std_test_score'] * 2,
Example #11
0
import numpy as np
import matplotlib as mp

import pyemma
from matplotlib import cm
from matplotlib import pyplot as plt
from msmbuilder.dataset import dataset
from msmbuilder.io import load_generic

ktrajs_dir = 'ktrajs-extracted-kcenters-lag1500-2-900'
ttrajs_dir = 'ttrajs-extracted-lag1500-new'
ktrajs_pkl = '%s-mle.pickl' % ktrajs_dir
micro_pkl = 'msm-%s-mle.pickl' % ktrajs_dir
macro_pkl = 'msm-%s-pcca-mle.pickl' % ktrajs_dir

clusterer = load_generic("../%s" % ktrajs_pkl)
msm_model = load_generic("../%s" % micro_pkl)
pcca = load_generic("../%s" % macro_pkl)

############################################################################
###      get macrostate mapping and state_label of each microstate       ###
############################################################################
mapping = pcca.microstate_mapping_
state_label = msm_model.state_labels_
msm_label = msm_model.state_labels_
n_microstates = msm_model.n_states_
n_macrostates = pcca.n_macrostates

############################################################################
###      get first two tica and plot free energy by mle msm_model        ###
############################################################################
Example #12
0
from msmbuilder.io import load_generic, load_trajs
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from utilities import *
import numpy as np
import pandas as pd
import seaborn as sns

# LOAD DATA
# TODO change the default name of the param search
param_search = load_generic('Positions-grid-search-results.pickl')
tica = load_generic('Positions-tica.pickl')
meta, ttrajs = load_trajs('Positions-ttrajs')
txx = np.concatenate(list(ttrajs.values()))
params, n_comb = get_param_combs(param_search)

# # PARAMETER SEARCH PLOT
# fig, axes = plt.subplots(nrows=n_comb, ncols=1)
# axes = plot_param_line(param_search, axes, params)
# plt.tight_layout()
# plt.savefig('Positions-param-results.pdf')
# plt.clf()
#
# # tICA DISTRIBUTION PLOT
# plot_tica_distribution(txx, sample_size=2000, ndims=4)
# plt.savefig('Positions-tica-dist.pdf')
# plt.clf()
#
# # TIME SCALES PLOT
# fig, axes = plt.subplots()
colors = sns.color_palette("colorblind", 8)
import pandas as pd


def plot_lv(mod):
    lvs = mod.left_eigenvectors_
    nstates = min(5, lvs.shape[1])
    fig, axes = plt.subplots(nrows=nstates, sharey=False, sharex=True)
    for idx, ax in enumerate(axes):
        ax.bar(range(lvs.shape[0]), lvs[:, idx])
    plt.savefig('msm-lvs.png')


def plot_rv(mod):
    lvs = mod.right_eigenvectors_
    nstates = min(5, lvs.shape[1])
    fig, axes = plt.subplots(nrows=nstates, sharey=False, sharex=True)
    for idx, ax in enumerate(axes):
        ax.bar(range(lvs.shape[0]), lvs[:, idx])
    plt.savefig('msm-rvs.png')


msm = load_generic('msm-lag-4000-nclusters-20.pickl')

plot_lv(msm)
plot_rv(msm)
print(msm.timescales_)
print(msm.uncertainty_timescales())
plt.matshow(msm.transmat_)
plt.savefig('msm-transmat.png')
Example #14
0
"""

# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
tica = load_generic('tica.pickl')
meta, ttrajs = load_trajs('ttrajs')
txx = np.concatenate(list(ttrajs.values()))


## Heatmap
def plot_heatmap(ax):
    ax.hexbin(txx[:, 0], txx[:, 1],
              cmap=sns.cubehelix_palette(as_cmap=True),
              mincnt=1,
              bins='log'
              )
    ax.set_xlabel("tIC 1", fontsize=16)
    ax.set_ylabel("tIC 2", fontsize=16)

Example #15
0
"""

# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
kmeans = load_generic('kmeans.pickl')
meta, ktrajs = load_trajs('ktrajs')
meta, ttrajs = load_trajs('ttrajs', meta)
txx = np.concatenate(list(ttrajs.values()))


def plot_cluster_centers(ax):
    ax.hexbin(txx[:, 0], txx[:, 1],
              cmap=sns.cubehelix_palette(as_cmap=True),
              mincnt=1,
              bins='log',
              )
    ax.scatter(kmeans.cluster_centers_[:, 0],
               kmeans.cluster_centers_[:, 1],
               s=40, c=colors[0],
               )
Example #16
0
matplotlib.use('Agg')
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
meta, ttrajs = load_trajs('ttrajs')
txx = np.concatenate(list(ttrajs.values()))

inds = load_generic("tica-dimension-0-inds.pickl")
straj = []
for traj_i, frame_i in inds:
    straj += [ttrajs[traj_i][frame_i, :]]
straj = np.asarray(straj)


## Overlay sampled trajectory on histogram
def plot_sampled_traj(ax):
    ax.hexbin(
        txx[:, 0],
        txx[:, 1],
        cmap='magma_r',
        mincnt=1,
        bins='log',
        alpha=0.8,
# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
meta, ttrajs = load_trajs('ttrajs')
txx = np.concatenate(list(ttrajs.values()))
kmeans = load_generic('kmeans.pickl')

inds = load_generic("cluster-sample-inds.pickl")
coordinates = [
    np.asarray([ttrajs[traj_i][frame_i, :] for traj_i, frame_i in state_inds])
    for state_inds in inds
    ]


## Overlay sampled states on histogram
def plot_sampled_states(ax):
    ax.hexbin(txx[:, 0], txx[:, 1],
              cmap='magma_r',
              mincnt=1,
              bins='log',
              alpha=0.8,
import pyemma
from msmbuilder.lumping import PCCAPlus,PCCA
#from msmbuilder.lumping import PCCAPlus
from msmbuilder.io import load_trajs, load_generic

tlag = 400
ntrajs = 130
ktrajs_dir = 'ktrajs-extracted-kcenters-lag1500-2-1000'
ktrajs_pkl = '%s-mle.pickl'%ktrajs_dir
microtraj_dir = 'microktrajs-%s-mle8'%ktrajs_dir
microtraj_pkl = 'msm-%s-mle8.pickl'%ktrajs_dir
ttrajs_dir = 'ttrajs-atpair2-lag1500'
#unwrapbp_dir = '../msm-remove10ns-cluster-unwrapbpnew2/unwrapbpnew2'

msm = load_generic(microtraj_pkl)

#meta, k_trajs = load_trajs(ktrajs_dir)
ktrajs = dataset('%s-mle'%ktrajs_dir,mode='r',fmt='dir-npy',verbose=True)
ttrajs = dataset(ttrajs_dir,mode='r',fmt='dir-npy',verbose=True)
#utrajs = dataset(unwrapbp_dir,mode='r',fmt='dir-npy',verbose=True)
ktrajs = [ktrajs[s].tolist() for s in range(len(ktrajs))]
#tvaluesall = [ttrajs[s].tolist() for s in range(len(ttrajs)) if s!=9]
#uvaluesall = [utrajs[s].tolist() for s in range(len(utrajs)) if s!=9]
tvaluesall = [ttrajs[s].tolist() for s in range(len(ttrajs))]
#uvaluesall = [utrajs[s].tolist() for s in range(len(utrajs))]
#tvalues = [ttrajs[s] for s in range(len(ttrajs))]
#txx = np.concatenate(tvaluesnew)

msm_label = msm.state_labels_
print(len(msm_label))
Example #19
0
# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
meta, ttrajs = load_trajs('ttrajs')
txx = np.concatenate(list(ttrajs.values()))
kmeans = load_generic('kmeans.pickl')

inds = load_generic("cluster-sample-inds.pickl")
coordinates = [
    np.asarray([ttrajs[traj_i][frame_i, :] for traj_i, frame_i in state_inds])
    for state_inds in inds
]


## Overlay sampled states on histogram
def plot_sampled_states(ax):
    ax.hexbin(
        txx[:, 0],
        txx[:, 1],
        cmap='magma_r',
        mincnt=1,
Example #20
0
Meta
----
depends:
  - top.pdb
  - trajs
"""

import mdtraj as md

from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic
from msmbuilder.io.sampling import sample_msm

## Load
meta, ttrajs = load_trajs('ttrajs')
msm = load_generic('msm.pickl')
kmeans = load_generic('kmeans.pickl')

## Sample
# Warning: make sure ttrajs and kmeans centers have
# the same number of dimensions
inds = sample_msm(ttrajs, kmeans.cluster_centers_, msm, n_steps=200, stride=1)
save_generic(inds, "msm-traj-inds.pickl")

## Make trajectory
top = preload_top(meta)
traj = md.join(
    md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
    for traj_i, frame_i in inds
)
Example #21
0
please cite msmbuilder in any publications
"""

import matplotlib
matplotlib.use('Agg')
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
tica = load_generic('tica.pickl')
meta, ttrajs = load_trajs('ttrajs')
txx = np.concatenate(list(ttrajs.values()))


## Heatmap
def plot_heatmap(ax):
    ax.hexbin(txx[:, 0],
              txx[:, 1],
              cmap=sns.cubehelix_palette(as_cmap=True),
              mincnt=1,
              bins='log')
    ax.set_xlabel("tIC 1", fontsize=16)
    ax.set_ylabel("tIC 2", fontsize=16)

Example #22
0
Meta
----
depends:
  - ../../top.pdb
  - ../../trajs
"""

import mdtraj as md
import os

from msmbuilder.io.sampling import sample_states
from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic

## Load
meta, ttrajs = load_trajs('ttrajs')
kmeans = load_generic("kmeans.pickl")

## Sample
inds = sample_states(ttrajs,
                     kmeans.cluster_centers_,
                     k=10)

save_generic(inds, "cluster-sample-inds.pickl")

## Make trajectories
top = preload_top(meta)
out_folder = "cluster_samples"
backup(out_folder)
os.mkdir(out_folder)

for state_i, state_inds in enumerate(inds):