Ejemplo n.º 1
0
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic
from msmbuilder.io.sampling import sample_states
from sklearn.neighbors import KDTree
import msmexplorer as msme
from msmbuilder.tpt import net_fluxes, fluxes
from msmbuilder.tpt import paths

sns.set_style('ticks')
colors = sns.color_palette()

## Load
kmeans = load_generic('../kcenters_30_100_5.pickl')
msm = load_generic('msm_kcen_30_100_5_16.pickl')
meta, ttrajs = load_trajs('../../ttrajs_a0_30')
txx = np.concatenate(list(ttrajs.values()))
a1 = ttrajs[14]

## Plot microstates
def plot_microstates(ax):
    ax.hexbin(txx[:, 0], txx[:, 1],
              cmap='Greys',
              mincnt=1,
              bins='log',
              )

    scale = 100 / np.max(msm.populations_)
    add_a_bit = 25
    ax.scatter(a1[0,0],a1[0,1], marker="x", s=200, c='g')
    ax.scatter(middles2[two, 0],
Ejemplo n.º 2
0
#!/bin/env python
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsOneClassifier
from msmbuilder.io import load_meta, load_trajs
import sys
from sklearn.externals import joblib

depth = 9
meta, all_data = load_trajs('alpha_carbon/')
meta, all_label = load_trajs('macro-mapping/')
all_data_one = np.concatenate(list(all_data.values()))
all_label_one = np.concatenate(list(all_label.values()))

clf = OneVsOneClassifier(
    RandomForestClassifier(n_estimators=100, max_depth=depth, random_state=0))
clf.fit(all_data_one, all_label_one)
print(' Depth %d Train Accu: %.3f' %
      (depth, np.sum(clf.predict(all_data_one) == all_label_one) /
       len(all_label_one)))

## save model
joblib.dump(clf, 'ovo-randomforest/final_es100_' + str(depth) + ".pkl")
Ejemplo n.º 3
0
"""Reduce dimensionality with tICA

msmbuilder autogenerated template version 2
created 2017-05-23T16:38:49.125259
please cite msmbuilder in any publications

"""

from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.decomposition import tICA

## Load
tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True)
meta, ftrajs = load_trajs("ftrajs")

## Fit
tica.fit(ftrajs.values())

## Transform
ttrajs = {}
for k, v in ftrajs.items():
    ttrajs[k] = tica.partial_transform(v)

## Save
save_trajs(ttrajs, 'ttrajs', meta)
save_generic(tica, 'tica.pickl')
Ejemplo n.º 4
0
"""

# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs

sns.set_style('ticks')
colors = sns.color_palette()

## Load
meta, rmsds = load_trajs('rmsds')


## Plot box plot
def plot_boxplot(ax):
    catted = np.concatenate([rmsds[k] for k in meta.index])
    sns.boxplot(catted * 10, ax=ax)
    ax.set_xlabel(r'RMSD / $\mathrm{\AA}$', fontsize=18)
    ax.set_yticks([])
    # ax.set_xticks(fontsize=16) #TODO: fontsize


## Report bad trajectories
def bad_trajs(cutoff=0.7):
    bad = {}
    for k in meta.index:
Ejemplo n.º 5
0
import matplotlib

matplotlib.use('Agg')
from matplotlib.pylab import plt
from matplotlib.ticker import FormatStrFormatter

sns.set_style("white")


def print_timescales(timescales):
    pass


if __name__ == "__main__":
    all_msms = load_generic('rmsd_msms.pickl')
    meta, ctraj_dict = load_trajs('ctraj-200')
    long_ctrajs = [
        np.squeeze(traj) for traj in ctraj_dict.values()
        if traj.shape[0] > 1000
    ]

    ps_to_ns = 1000
    n_ts = 10
    timescales = []
    lags = []
    for msm in all_msms:
        timescales.append(msm.timescales_[:n_ts])
        lags.append(msm.get_params()['lag_time'])
    lags = np.array(lags)
    timescales = np.array(timescales).T / ps_to_ns
    msm = all_msms[np.extract(lags == 2000, np.arange(len(lags)))[0]]
from msmbuilder.io import load_trajs, save_trajs, save_generic, load_generic
from pyemma.msm import bayesian_markov_model
import time
import matplotlib
matplotlib.use('Agg')
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
sns.set_style('ticks')
colors = sns.color_palette()
import pickle
start_time = time.time()
## Load
meta, ktrajs = load_trajs('../../ktrajs_cen_30_100_5')

dtrajs = list(ktrajs.values())
type(dtrajs[0])
## Fit
msm = bayesian_markov_model(dtrajs, lag=16, nsamples=100000)

print('done with bmm')

## Load
kmeans = load_generic('../../kcenters_30_100_5.pickl')
msm2 = load_generic('../msm_kcen_30_100_5_16.pickl')
meta, ttrajs = load_trajs('../../../ttrajs_a0_30')
txx = np.concatenate(list(ttrajs.values()))
a1 = ttrajs[14]

print('done with load')
Ejemplo n.º 7
0
"""Reduce dimensionality with tICA

{{header}}
Meta
----
depends:
  - ftrajs
  - meta.pandas.pickl
"""

from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.decomposition import tICA

## Load
tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True)
meta, ftrajs = load_trajs("ftrajs")

## Fit
tica.fit(ftrajs.values())

## Transform
ttrajs = {}
for k, v in ftrajs.items():
    ttrajs[k] = tica.partial_transform(v)

## Save
save_trajs(ttrajs, "ttrajs", meta)
save_generic(tica, "tica.pickl")
Ejemplo n.º 8
0
"""

import matplotlib

matplotlib.use('Agg')
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs

sns.set_style('ticks')
colors = sns.color_palette()

## Load
meta, rmsds = load_trajs('rmsds')


## Plot box plot
def plot_boxplot(ax):
    catted = np.concatenate([rmsds[k] for k in meta.index])
    sns.boxplot(catted * 10, ax=ax)
    ax.set_xlabel(r'RMSD / $\mathrm{\AA}$', fontsize=18)
    ax.set_yticks([])
    # ax.set_xticks(fontsize=16) #TODO: fontsize


## Report bad trajectories
def bad_trajs(cutoff=0.7):
    bad = {}
    for k in meta.index:
Ejemplo n.º 9
0
    width = (2 * np.pi) / N

    ax1.bar(np.deg2rad(theta[1:]), radii, width=width, color=color, alpha=.5)

    if title is not None:
        plt.suptitle(title)

    plt.tight_layout()

    f = plt.gcf()
    return f, (ax1, ax2)


if __name__ == '__main__':
    # Load
    meta, dtrajs = load_trajs('dtrajs')

    dihed_names = ['cTnI dihedral', 'cTnT dihedral']
    data_dict = {}  # This will have keys, one for each sim type
    for t in meta['type'].unique():
        indexes = meta[meta['type'] == t].index  # The simulation indexes
        dihed_arr = np.concatenate([dtrajs.get(key) for key in indexes])
        data_dict[t] = dihed_arr

    n_diheds = list(data_dict.values())[0].shape[1]

    for i in range(n_diheds):
        n_types = 0
        patch_list = []
        for k, v in data_dict.items():
            data = np.rad2deg(v[:, i])
def clust(args):
    k, v, cluster = args
    print(k)
    ctraj = cluster.transform(v)
    return k, ctraj

if __name__ == "__main__":

    # Load data
    meta = load_meta()
    tops = preload_tops(meta)
    totframes = meta['nframes'].sum()

    ctraj_path = 'ctraj-200'
    if isdir(ctraj_path):
        meta, all_ctrajs_dict = load_trajs(ctraj_path)
    else:

        def traj_load(irow):
            i, row = irow
            traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
            return i, traj


        traj_dict = dict(map(traj_load, meta.iterrows()))
        all_trajs = [traj for traj in traj_dict.values()]

        cluster = LandmarkAgglomerative(n_clusters=200, n_landmarks=int(totframes /200), linkage='ward', metric='rmsd')
        cluster.fit(all_trajs)
        # TODO will this work?
        args = [(k,v,cluster) for k, v in traj_dict.items()]
Ejemplo n.º 11
0
# NB: Can't do this in parallel (easily) as the algorithm requires all trajectory data to do transform
#

from msmbuilder.preprocessing import RobustScaler
import numpy as np
from msmbuilder.io import load_trajs, save_trajs, save_generic
import matplotlib
matplotlib.use('Agg')
from matplotlib.pylab import plt
from utilities import plot_box

if __name__ == '__main__':

    # Load
    feature_name = 'Positions'
    meta, feature_trajs = load_trajs('Unscaled-{}-ftraj'.format(feature_name))

    # Select scaler
    featurizer = RobustScaler()

    # Transform values
    featurizer.fit_transform(feature_trajs.values())
    scaled_trajs = {}
    for k, v in feature_trajs.items():
        scaled_trajs[k] = featurizer.partial_transform(v)

    # Plot unscaled features
    ftrajs = np.concatenate([fx[::100] for fx in scaled_trajs.values()])
    fig, ax = plt.subplots(figsize=(15, 5))
    plot_box(ax, fxx=ftrajs, feature_name='Scaled {}'.format(feature_name))
    fig.tight_layout()
Ejemplo n.º 12
0
def plot(data, fname):
    plt.clf()
    width = 10
    data = np.array(data)
    bins = np.arange(0, max(data), step=width)
    sns.distplot(data, norm_hist=True, kde=True, bins=bins, label='No ones')
    plt.ylim((0, 0.001))
    plt.xlabel('First passage time (ps)')
    plt.text(
        1000, 0.0005,
        'MFPT = {0:4.2f} +/- {1:4.2f} ps'.format(data.mean(), 2 * data.std()))
    plt.savefig(fname, transparanet=True)


if __name__ == "__main__":
    meta, ctraj = load_trajs('pcca-2-traj')
    fpt = {}
    fpt[(0, 1)] = []
    fpt[(1, 0)] = []
    for k, v in ctraj.items():
        count = 0
        for i in range(len(v) - 1):
            v1, v2 = v[i:(i + 2)]
            if math.isnan(v1) or math.isnan(v2):
                count = 0
            else:
                count += 1
                if v1 != v2:
                    fpt[(v1, v2)].append(count)
                    count = 0
Ejemplo n.º 13
0
# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
kmeans = load_generic('kmeans.pickl')
meta, ktrajs = load_trajs('ktrajs')
meta, ttrajs = load_trajs('ttrajs', meta)
txx = np.concatenate(list(ttrajs.values()))


def plot_cluster_centers(ax):
    ax.hexbin(txx[:, 0], txx[:, 1],
              cmap=sns.cubehelix_palette(as_cmap=True),
              mincnt=1,
              bins='log',
              )
    ax.scatter(kmeans.cluster_centers_[:, 0],
               kmeans.cluster_centers_[:, 1],
               s=40, c=colors[0],
               )
    ax.set_xlabel("tIC 1", fontsize=16)
Ejemplo n.º 14
0
{{header}}

Meta
----
depends:
  - top.pdb
  - trajs
"""

import mdtraj as md

from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic
from msmbuilder.io.sampling import sample_msm

## Load
meta, ttrajs = load_trajs('ttrajs')
msm = load_generic('msm.pickl')
kmeans = load_generic('kmeans.pickl')

## Sample
# Warning: make sure ttrajs and kmeans centers have
# the same number of dimensions
inds = sample_msm(ttrajs, kmeans.cluster_centers_, msm, n_steps=200, stride=1)
save_generic(inds, "msm-traj-inds.pickl")

## Make trajectory
top = preload_top(meta)
traj = md.join(
    md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
    for traj_i, frame_i in inds
)
#
# Does grid search for optimum parameters
#
from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.decomposition import tICA, sparsetica

from sklearn.model_selection import ShuffleSplit, GridSearchCV
import pandas as pd

if __name__ == '__main__':

    # Load data
    feature_name = 'Positions'
    meta, ftrajs = load_trajs("Scaled-{}-ftraj".format(feature_name))
    X = list(ftrajs.values())

    # Specify CV strategy and parameters
    cv_iter = ShuffleSplit(n_splits=10, test_size=0.5, random_state=0)
    param_grid = [{'n_components': [10, 20, 40], 'lag_time': [1, 10, 100]}]

    # CV object
    model = tICA(kinetic_mapping=True)

    # Do grid search
    clf = GridSearchCV(estimator=model,
                       param_grid=param_grid,
                       cv=cv_iter,
                       n_jobs=2)
    clf.fit(X)

    # Save results
Ejemplo n.º 16
0
# Plot features
from msmbuilder.io import load_trajs
import numpy as np
import matplotlib
matplotlib.use('Agg')
from matplotlib.pylab import plt
import sys
import seaborn as sns
colors = sns.color_palette("colorblind", 8)

for feature in ['angles', 'dihedrals', 'bonds', 'contacts']:
    meta, ftraj = load_trajs(
        'featurized_trajectories/{}-ftraj'.format(feature))
    ftraj = np.concatenate([traj for traj in ftraj.values()])

    if feature in ['angles', 'dihedrals']:
        sample = ftraj[np.random.choice(ftraj.shape[0], size=10000), :]
        sample = sample[:, np.arange(0, ftraj.shape[1], 2)]
        print(feature, sample.shape)
    elif feature in ['contacts']:
        sample = ftraj[np.random.choice(ftraj.shape[0], size=10000)]
        print(feature, sample.shape)
    else:
        sample = ftraj[np.random.choice(ftraj.shape[0], size=10000), :]
        print(feature, sample.shape)

    try:
        n_feats_plot = sample.shape[1]
    except IndexError:
        n_feat_plot = 1
Ejemplo n.º 17
0
from msmbuilder.io import load_generic, load_trajs
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from utilities import *
import numpy as np
import pandas as pd
import seaborn as sns

# LOAD DATA
# TODO change the default name of the param search
param_search = load_generic('Positions-grid-search-results.pickl')
tica = load_generic('Positions-tica.pickl')
meta, ttrajs = load_trajs('Positions-ttrajs')
txx = np.concatenate(list(ttrajs.values()))
params, n_comb = get_param_combs(param_search)

# # PARAMETER SEARCH PLOT
# fig, axes = plt.subplots(nrows=n_comb, ncols=1)
# axes = plot_param_line(param_search, axes, params)
# plt.tight_layout()
# plt.savefig('Positions-param-results.pdf')
# plt.clf()
#
# # tICA DISTRIBUTION PLOT
# plot_tica_distribution(txx, sample_size=2000, ndims=4)
# plt.savefig('Positions-tica-dist.pdf')
# plt.clf()
#
# # TIME SCALES PLOT
# fig, axes = plt.subplots()
Ejemplo n.º 18
0
# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
kmeans = load_generic('kmeans.pickl')
meta, ktrajs = load_trajs('ktrajs')
meta, ttrajs = load_trajs('ttrajs', meta)
txx = np.concatenate(list(ttrajs.values()))


def plot_cluster_centers(ax):
    ax.hexbin(
        txx[:, 0],
        txx[:, 1],
        cmap=sns.cubehelix_palette(as_cmap=True),
        mincnt=1,
        bins='log',
    )
    ax.scatter(
        kmeans.cluster_centers_[:, 0],
        kmeans.cluster_centers_[:, 1],
Ejemplo n.º 19
0
from msmbuilder.io import load_meta, preload_tops, save_generic, itertrajs, backup, load_trajs
import mdtraj as md
from msmbuilder.cluster import LandmarkAgglomerative
import matplotlib
matplotlib.use('Agg')
from matplotlib.pylab import plt
import numpy as np
import seaborn as sns
from utilities import to_dataframe


# load trajectories
feature = 'dihedrals'
meta, traj_dict= load_trajs('pruned_trajectories/{}-ftraj'.format(feature))
trajs = [traj for traj in traj_dict.values()]
nframes = int(np.max(meta['nframes'].unique()[0]))

# cluster
num_clusters=10
cluster = LandmarkAgglomerative(n_clusters=num_clusters, n_landmarks=200, linkage='ward', metric='euclidean')
cluster.fit(trajs)

ctraj = {}
for k, v in traj_dict.items():
    v = v.copy(order='C')
    v = cluster.partial_predict(v)
    diff = nframes-v.shape[0]
    v = np.append(v, np.zeros(diff)-1)
    ctraj[k] = v

# Convert to DF for plotting and sampling.
Ejemplo n.º 20
0
"""

# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs

sns.set_style('ticks')
colors = sns.color_palette()

## Load
meta, ftrajs = load_trajs('ftrajs')
# (stride by 100 for memory concerns)
fxx = np.concatenate([fx[::100] for fx in ftrajs.values()])


## Box and whisker plot
def plot_box(ax):
    n_feats_plot = min(fxx.shape[1], 100)
    ax.boxplot(
        fxx[:, :100],
        boxprops={'color': colors[0]},
        whiskerprops={'color': colors[0]},
        capprops={'color': colors[0]},
        medianprops={'color': colors[2]},
    )
Ejemplo n.º 21
0
Meta
----
depends:
 - meta.pandas.pickl
 - ktrajs
"""
from multiprocessing import Pool

import pandas as pd

from msmbuilder.io import load_trajs
from msmbuilder.msm import MarkovStateModel

## Load
meta, ktrajs = load_trajs('ktrajs')

## Parameters
lagtimes = [2 ** i for i in range(8)]


## Define what to do for parallel execution
def at_lagtime(lt):
    msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False)
    msm.fit(list(ktrajs.values()))
    ret = {
        'lag_time': lt,
        'percent_retained': msm.percent_retained_,
    }
    for i in range(msm.n_timescales):
        ret['timescale_{}'.format(i)] = msm.timescales_[i]
Ejemplo n.º 22
0
Meta
----
depends:
 - meta.pandas.pickl
 - ktrajs
"""
from multiprocessing import Pool

import pandas as pd

from msmbuilder.io import load_trajs
from msmbuilder.msm import MarkovStateModel

## Load
meta, ktrajs = load_trajs('ktrajs')

## Parameters
lagtimes = [2**i for i in range(8)]


## Define what to do for parallel execution
def at_lagtime(lt):
    msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False)
    msm.fit(list(ktrajs.values()))
    ret = {
        'lag_time': lt,
        'percent_retained': msm.percent_retained_,
    }
    for i in range(msm.n_timescales):
        ret['timescale_{}'.format(i)] = msm.timescales_[i]
Ejemplo n.º 23
0
{{header}}
"""

# ? include "plot_header.template"
# ? from "plot_macros.template" import xdg_open with context

import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from msmbuilder.io import load_trajs

sns.set_style('ticks')
colors = sns.color_palette()

## Load
meta, ftrajs = load_trajs('ftrajs')
# (stride by 100 for memory concerns)
fxx = np.concatenate([fx[::100] for fx in ftrajs.values()])


## Box and whisker plot
def plot_box(ax):
    n_feats_plot = min(fxx.shape[1], 100)
    ax.boxplot(fxx[:, :100],
               boxprops={'color': colors[0]},
               whiskerprops={'color': colors[0]},
               capprops={'color': colors[0]},
               medianprops={'color': colors[2]},
               )

    if fxx.shape[1] > 100:
Ejemplo n.º 24
0
"""

import matplotlib
matplotlib.use('Agg')
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from msmbuilder.io import load_trajs, load_generic

sns.set_style('ticks')
colors = sns.color_palette()

## Load
tica = load_generic('tica.pickl')
meta, ttrajs = load_trajs('ttrajs')
txx = np.concatenate(list(ttrajs.values()))


## Heatmap
def plot_heatmap(ax):
    ax.hexbin(txx[:, 0],
              txx[:, 1],
              cmap=sns.cubehelix_palette(as_cmap=True),
              mincnt=1,
              bins='log')
    ax.set_xlabel("tIC 1", fontsize=16)
    ax.set_ylabel("tIC 2", fontsize=16)


## Timescales
Ejemplo n.º 25
0
#
# NB: Can't do this in parallel (easily) as the algorithm requires all trajectory data to do transform
#

from msmbuilder.preprocessing import RobustScaler
import numpy as np
from msmbuilder.io import load_trajs, save_trajs, save_generic
import matplotlib
matplotlib.use('Agg')
from matplotlib.pylab import plt
from utilities import plot_box

if __name__ == '__main__':

    # Load
    meta, feature_trajs = load_trajs('ftraj')

    # Select scaler
    featurizer = RobustScaler()

    # Transform values
    featurizer.fit_transform(feature_trajs.values())
    scaled_trajs = {}
    for k, v in feature_trajs.items():
        scaled_trajs[k] = featurizer.partial_transform(v)

    # Save
    sample = np.concatenate([fx for fx in scaled_trajs.values()])
    sample = sample[np.random.choice(sample.shape[0], 1000, replace=False), :]
    variance = np.apply_along_axis(np.var, axis=0, arr=sample)
    order = np.argsort(variance)