Python MiniBatchKMeans Examples, msmbuilder.cluster.MiniBatchKMeans Python Examples

Example #1

0

Show file

File: fit_transform_kinase_series.py Project: HWaymentSteele/kinase_msm

def fit_protein_kmeans(yaml_file,mini=True,pca=False):
    mdl_dir = yaml_file["mdl_dir"]
    mdl_params = yaml_file["mdl_params"]

    current_mdl_params={}
    for i in mdl_params.keys():
        if i.startswith("cluster__"):
            current_mdl_params[i.split("cluster__")[1]] = mdl_params[i]

    if mini:
        current_mdl_params["batch_size"] = 100*current_mdl_params["n_clusters"]
        kmeans_mdl = MiniBatchKMeans(**current_mdl_params)
    else:
        kmeans_mdl = KMeans(**current_mdl_params)
    data = []

    for protein in yaml_file["protein_list"]:
        with enter_protein_mdl_dir(yaml_file, protein):
            if pca:
                tica_data = verboseload("pca_data.pkl")
            else:
                tica_data = verboseload("tica_data.pkl")
            # get all traj
            sorted_list = sorted(tica_data.keys(), key=keynat)
            data.extend([tica_data[i] for i in sorted_list])

    kmeans_mdl.fit(data)
    kmeans_mdl_path = os.path.join(mdl_dir, "kmeans_mdl.pkl")
    verbosedump(kmeans_mdl, kmeans_mdl_path)
    return

Example #2

0

Show file

File: test_plot_utils.py Project: jeiros/msmadapter

class TestPlotUtils:

    def setUp(self):
        numpy.random.seed(12)
        self.ttrajs = {
            0 : numpy.random.rand(20, 3),
            1 : numpy.random.rand(20, 3),
        }
        self.clusterer = MiniBatchKMeans(n_clusters=2)
        self.clusterer.fit(list(self.ttrajs.values()))

    def test_plot_spawns(self):
        ax = plot_spawns(
            inds=spawns,
            tica_trajs=self.ttrajs,
            ax=None
        )
        assert isinstance(ax, Axes)

    def test_plot_tica_landscape(self):
        f, ax = plot_tica_landscape(self.ttrajs)
        assert isinstance(ax, Axes)

    def test_plot_clusters(self):
        ax = plot_clusters(self.clusterer)
        assert isinstance(ax, Axes)

Example #3

0

Show file

File: custom_clusterer.py Project: DorisMai/conformation

def cluster_minikmeans(tica_dir,
                       data_dir,
                       traj_dir,
                       n_clusters,
                       clusterer_dir=None,
                       tICs=None):
    if (os.path.exists(clusterer_dir)):
        reduced_data = load_file(data_dir)
        clusterer = verboseload(clusterer_dir)
        clusterer.labels_ = clusterer.transform(reduced_data)
        verbosedump(clusterer, clusterer_dir)
    else:
        print("Clustering by KMeans")
        try:
            reduced_data = verboseload(data_dir)
        except:
            reduced_data = load_dataset(data_dir)
        if tICs is not None:
            X = []
            for traj in reduced_data:
                X.append(traj[:, tICs])
        else:
            X = reduced_data

        clusterer = MiniBatchKMeans(n_clusters=n_clusters, n_init=10)
        clusterer.fit_transform(X)
        verbosedump(clusterer, clusterer_dir)

Example #4

0

Show file

File: fit_transform_kinase_series.py Project: sonyahanson/kinase_msm

def fit_protein_kmeans(yaml_file,mini=True):
    mdl_dir = yaml_file["mdl_dir"]
    mdl_params = yaml_file["mdl_params"]

    current_mdl_params={}
    for i in mdl_params.keys():
        if i.startswith("cluster__"):
            current_mdl_params[i.split("cluster__")[1]] = mdl_params[i]

    if mini:
        current_mdl_params["batch_size"] = 100*current_mdl_params["n_clusters"]
        kmeans_mdl = MiniBatchKMeans(**current_mdl_params)
    else:
        kmeans_mdl = KMeans(**current_mdl_params)
    data = []

    for protein in yaml_file["protein_list"]:
        with enter_protein_mdl_dir(yaml_file, protein):
            tica_data = verboseload("tica_data.pkl")
            # get all traj
            sorted_list = sorted(tica_data.keys(), key=keynat)
            data.extend([tica_data[i] for i in sorted_list])

    kmeans_mdl.fit(data)
    kmeans_mdl_path = os.path.join(mdl_dir, "kmeans_mdl.pkl")
    verbosedump(kmeans_mdl, kmeans_mdl_path)
    return

Example #5

0

Show file

File: test_plot_utils.py Project: jeiros/msmadapter

 def setUp(self):
     numpy.random.seed(12)
     self.ttrajs = {
         0 : numpy.random.rand(20, 3),
         1 : numpy.random.rand(20, 3),
     }
     self.clusterer = MiniBatchKMeans(n_clusters=2)
     self.clusterer.fit(list(self.ttrajs.values()))

Example #6

0

Show file

    def generate_clusters(self, ticad):
        """
        Updates the cluster data. Needs to be re-done each iteration as
        cluster from previous trajectories may change as we get more data.

        Returns: clustered dataset
        """
        clustr = MiniBatchKMeans(
            n_clusters=self.config.getint("model", "num_clusters"))
        clustered = clustr.fit_transform(ticad)
        if self.save_extras:
            utils.dump(clustr, "microstater.pkl")
        return clustered

Example #7

0

Show file

File: custom_clusterer.py Project: msultan/conformation

def cluster_minikmeans(tica_dir, data_dir, traj_dir, n_clusters, lag_time):
	clusterer_dir = "%s/clusterer_%dclusters.h5" %(tica_dir, n_clusters)
	if (os.path.exists(clusterer_dir)):
		print "Already clustered"
	else:
		print "Clustering by KMeans"
		try:
			reduced_data = verboseload(data_dir)
		except:
			reduced_data = load_dataset(data_dir)
		trajs = np.concatenate(reduced_data)
		clusterer = MiniBatchKMeans(n_clusters = n_clusters)
		clusterer.fit_transform(reduced_data)
		verbosedump(clusterer, clusterer_dir)

Example #8

0

Show file

File: FAST.py Project: chemlove/Protein_struct_traj_analysis

def cluster_features(features, clusterer, n_clusters=8):
    '''
    Input
    features : list of arrays, length n_trajs, each of shape (n_samples, n_features)
	
    Output
    clst : msmbuilder.cluster object, with attributes
        cluster_centers_ : (n_clusters, n_features)
	labels_	         : list of arrays, each of shape (n_samples, )
    '''
    if clusterer == 'KMeans':
        from msmbuilder.cluster import KMeans
        clst = KMeans(n_clusters=n_clusters)
    elif clusterer == 'KCenters':
        from msmbuilder.cluster import KCenters
        clst = KCenters(n_clusters=n_clusters)
    elif clusterer == 'KMedoids':
        from msmbuilder.cluster import KMedoids
        clst = KMedoids(n_clusters=n_clusters)
    elif clusterer == 'MiniBatchKMeans':
        from msmbuilder.cluster import MiniBatchKMeans
        clst = MiniBatchKMeans(n_clusters=n_clusters)
    elif clusterer == 'MiniBatchKMedoids':
        from msmbuilder.cluster import MiniBatchKMedoids
        clst = MiniBatchKMedoids(n_clusters=n_clusters)
    clusters = clst.fit_transform(features)
    return clst

Example #9

0

Show file

def get_pipeline(parameters):
    """
    Wrapper so that new instance of a pipeline can be instantiated for every fold. 
    :return: sklean.pipeline.Pipeline object
    """
    pipe = Pipeline([('variance_cut', VarianceThreshold()),
                     ('tica', tICA(kinetic_mapping=True)),
                     ('cluster', MiniBatchKMeans()),
                     ('msm', MarkovStateModel(use_gap='timescales', lag_time=50, verbose=True))])
    pipe.set_params(**parameters)

    return pipe

Example #10

0

Show file

File: adaptive.py Project: jeiros/msmadapter

    def build_model(self, user_defined_model):
        """
        Load or build a model (Pipeline from scikit-learn) to do all the transforming and fitting
        :param user_defined_model: Either a string (to load from disk) or a Pipeline object to use as model
        :return model: Return the model back
        """
        if user_defined_model is None:
            if os.path.exists(self.model_pkl_fname):
                logger.info('Loading model pkl file {}'.format(
                    self.model_pkl_fname))
                model = load_generic(self.model_pkl_fname)
            else:
                logger.info('Building default model based on dihedrals')

                # build a lag time of 1 ns for tICA and msm
                # if the stride is too big and we can't do that
                # use 1 frame and report how much that is in ns
                if self.app.meta is not None:
                    lag_time = max(1, int(1 / self.timestep))
                    logger.info(
                        'Using a lag time of {} ns for the tICA and MSM'.
                        format(lag_time * self.timestep))
                else:
                    self.timestep = None
                    lag_time = 1
                    logger.warning(
                        'Cannot determine timestep. Defaulting to 1 frame.'.
                        format(lag_time))
                model = Pipeline([('feat', DihedralFeaturizer()),
                                  ('scaler', RobustScaler()),
                                  ('tICA',
                                   tICA(lag_time=lag_time,
                                        commute_mapping=True,
                                        n_components=10)),
                                  ('clusterer',
                                   MiniBatchKMeans(n_clusters=200)),
                                  ('msm',
                                   MarkovStateModel(lag_time=lag_time,
                                                    ergodic_cutoff='off',
                                                    reversible_type=None))])
        else:
            if not isinstance(user_defined_model, Pipeline):
                raise ValueError(
                    'model is not an sklearn.pipeline.Pipeline object')
            else:
                logger.info('Using user defined model')
                model = user_defined_model
        return model

Example #11

0

Show file

        # tICA is similar to principal component analysis
        tica_model = tICA(lag_time=int(args.lag),
                          n_components=int(args.components))
        # fit and transform can be done in seperate steps:
        tica_model = scaled_diheds.fit_with(tica_model)
        tica_trajs = scaled_diheds.transform_with(tica_model,
                                                  'ticas/',
                                                  fmt='dir-npy')

        # Conformations need to be clustered into states (sometimes written as microstates).
        # We cluster based on the tICA projections to group conformations that interconvert
        # rapidly. Note that we transform our trajectories from the n_components-dimensional
        # tICA space into a 1-dimensional cluster index
        txx = np.concatenate(tica_trajs)
        #_ = msme.plot_histogram(txx)
        clusterer = MiniBatchKMeans(n_clusters=int(args.clusters),
                                    random_state=42)
        clustered_trajs = tica_trajs.fit_transform_with(clusterer,
                                                        'kmeans/',
                                                        fmt='dir-npy')
        #plt.figure()
        #plt.hexbin(txx[:,0], txx[:,1], bins='log', mincnt=1, cmap='viridis')
        #plt.scatter(clusterer.cluster_centers_[:,0], clusterer.cluster_centers_[:,1], s=100, c='w')
        #plt.savefig('microstate_clusters.png')

        # We can construct an MSM from the labeled trajectories
        msm = MarkovStateModel(lag_time=int(args.lag), n_timescales=20)
        msm.fit(clustered_trajs)
        assignments = clusterer.partial_transform(txx)
        assignments = msm.partial_transform(assignments)
        #msme.plot_free_energy(txx, obs=(0, 1), n_samples=10000,
        #                  pi=msm.populations_[assignments],

Example #12

0

Show file

def calculate_fitness(population_dihedral, diheds, score_global, i, lock):
    import pandas as pd
    import numpy as np
    pop_index = i
    new_diheds = []

    for i in range(0, len(diheds)):
        X = diheds[i]
        selected_features = X[:, population_dihedral]
        new_diheds.append(selected_features)
    from msmbuilder.preprocessing import RobustScaler
    scaler = RobustScaler()
    scaled_diheds = scaler.fit_transform(new_diheds)
    scaled_diheds = new_diheds
    from msmbuilder.decomposition import tICA
    tica_model = tICA(lag_time=2, n_components=5)
    tica_model.fit(scaled_diheds)
    tica_trajs = tica_model.transform(scaled_diheds)
    from msmbuilder.cluster import MiniBatchKMeans
    clusterer = MiniBatchKMeans(n_clusters=200, random_state=42)

    clustered_trajs = clusterer.fit_transform(tica_trajs)
    from msmbuilder.msm import MarkovStateModel
    msm = MarkovStateModel(lag_time=50, n_timescales=5)
    #msm.fit_transform(clustered_trajs)
    from sklearn.cross_validation import KFold
    n_states = [4]
    cv = KFold(len(clustered_trajs), n_folds=5)
    results = []
    for n in n_states:
        msm.n_states_ = n
        for fold, (train_index, test_index) in enumerate(cv):
            train_data = [clustered_trajs[i] for i in train_index]
            test_data = [clustered_trajs[i] for i in test_index]
            msm.fit(train_data)
            train_score = msm.score(train_data)
            test_score = msm.score(test_data)
            time_score = msm.timescales_[0]
            time_test_score = time_score + test_score
            print(time_score)
            print(test_score)
            av_score = time_test_score / 2
            results.append({
                'train_score': train_score,
                'test_score': test_score,
                'time_score': time_score,
                'av_score': av_score,
                'n_states': n,
                'fold': fold
            })
            print(msm.timescales_)
    results = pd.DataFrame(results)
    avgs = (results.groupby('n_states').aggregate(np.median).drop('fold',
                                                                  axis=1))
    best_nt = avgs['test_score'].idxmax()
    best_n = avgs['av_score'].idxmax()
    best_score = avgs.loc[best_n, 'av_score']
    best_scorent = avgs.loc[best_nt, 'test_score']
    print(best_scorent)
    lock.acquire()
    score_global.update({pop_index: best_scorent})
    lock.release()

Example #13

0

Show file

from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import MarkovStateModel
from sklearn.pipeline import Pipeline
import os
from ..adaptive import create_folder

logging.disable(logging.CRITICAL)

parser = NumberedRunsParser(traj_fmt='run-{run}.nc',
                            top_fn='data_app/runs/structure.prmtop',
                            step_ps=200)
meta = gather_metadata('/'.join(['data_app/runs/', '*nc']), parser)

model = Pipeline([('feat', DihedralFeaturizer()), ('scaler', MinMaxScaler()),
                  ('tICA', tICA(lag_time=1, n_components=4)),
                  ('clusterer', MiniBatchKMeans(n_clusters=5)),
                  ('msm', MarkovStateModel(lag_time=1, n_timescales=4))])

spawns = [
    (0, 1),
]
epoch = 1


class TestAppBase:
    def __init__(self):
        self.app = App(generator_folder='data_app/generators',
                       data_folder='data_app/runs',
                       input_folder='data_app/inputs',
                       filtered_folder='data_app/filtered_trajs',
                       model_folder='data_app/model',

Example #14

0

Show file

File: model_without_tica.py Project: RobertArbon/four_well_analysis

#
# TIMESCALES
#
# The data will be loaded with a stride of 10 frames.  Each fame is 50ps, so the time per frame will be
# 500ps/frame or 0.5ns/frame.
# Each trajectory is 1000 frames long
# Lag time will be 40 frames (20 ns)  based on a visual inspection of /Misc/MSM_lag_time.ipynb
to_ns = 0.5
msm_lag = int(40 / to_ns)

#
# FEATURE INDICES
#
all_idx = np.load('indices_all.npy')

#
# OTHER PARAMETERS
#
ref_traj = md.load('../Data/data/trajectory-1.xtc',
                   top='../Data/data/fs-peptide.pdb')

featurizer = FeatureSelector(features=feats)

pipe = Pipeline([('features', featurizer),
                 ('variance_cut', VarianceThreshold()),
                 ('scaling', RobustScaler()), ('cluster', MiniBatchKMeans()),
                 ('msm', MarkovStateModel(lag_time=msm_lag, verbose=False))])

save_generic(pipe, 'model.pickl')

Example #15

0

Show file

import pandas as pd
import pickle
import glob
from msmbuilder.featurizer import ContactFeaturizer
from msmbuilder.dataset import dataset
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import MarkovStateModel
from msmbuilder.utils import verbosedump,verboseload

file='dataset_nark.best_nonredu.pkl'
alpha=pickle.load(open(file))
print ('#_trajs:'+ str(np.shape(alpha)[0])+'\n' '#_CA_contacts:'+str(np.shape(alpha[0])[1]))
tica_model=tICA(n_components=10,lag_time=1)
tica_trajs=tica_model.fit_transform(alpha)
clusterer =MiniBatchKMeans(n_clusters=400)
clustered_trajs = clusterer.fit_transform(tica_trajs)
msm =MarkovStateModel(lag_time=150, n_timescales=5)
assignments = msm.fit_transform(clustered_trajs)
data = np.concatenate(tica_trajs, axis=0)
pi_0 = msm.populations_[np.concatenate(assignments, axis=0)]


name=file[:-4]
verbosedump(tica_model, name+"-GA-tica_model.pkl")
verbosedump(tica_trajs, name+"-GA-tica_trajs.pkl")
verbosedump(clusterer, name+"-GA-mbkm_mdl.pkl")
verbosedump(clustered_trajs, name+"-GA-clustered_trajs.pkl")
verbosedump(msm,name+"-GA-msm.pkl")
verbosedump(assignments,name+"-GA-assignments.pkl")
verbosedump(data,name+"-GA-weighted-msme-tica-data.pkl")

Example #16

0

Show file

# Concatenate features
# ftraj = num trajectories x np.array(n_frames, n_features)
ftraj = []
for traj in all_trajs:
    tmp = []
    for feat in traj:
        if feat.ndim == 1:
            feat = feat.reshape(-1, 1)
        tmp.append(feat)
    ftraj.append(np.concatenate(tmp, axis=1))

# Make Pipeline
cv_iter = ShuffleSplit(n_splits=5, test_size=0.5)

estimators = [('scale', StandardScaler()), ('tica', tICA()),
              ('cluster', MiniBatchKMeans(random_state=0)),
              ('msm', MarkovStateModel())]

param_grid = {
    'cluster__n_clusters': list(np.linspace(200, 500, num=2).astype(int)),
    'tica__n_components': list(np.linspace(10, 30, num=2).astype(int)),
    'tica__lag_time': list(np.linspace(200, 500, num=2).astype(int))
}

params = {
    'cluster__n_clusters': scipy.stats.randint(low=200, high=200),
    'tica__n_components': scipy.stats.randint(low=2, high=40),
    'tica__lag_time': scipy.stats.randint(low=100, high=999)
}

pipe = Pipeline(estimators)

Example #17

0

Show file

File: plot_stackdist.py Project: tijaunet/msmexplorer

rs = np.random.RandomState(42)

# Load Fs Peptide Data
trajs = FsPeptide().get().trajectories

# Extract Backbone Dihedrals
featurizer = DihedralFeaturizer(types=['chi1'])
diheds = featurizer.fit_transform(trajs)

# Perform Dimensionality Reduction
tica_model = tICA(lag_time=2, n_components=2)
tica_trajs = tica_model.fit_transform(diheds)

# Perform Clustering
clusterer = MiniBatchKMeans(n_clusters=12, random_state=rs)
clustered_trajs = clusterer.fit_transform(tica_trajs)

# Construct MSM
msm = MarkovStateModel(lag_time=2)
assignments = msm.fit_transform(clustered_trajs)

# Plot Stacked Distributions
a = np.concatenate(assignments, axis=0)
d = np.concatenate(diheds, axis=0)

# Plot Stacked Distributions of the sine of each Chi1 angle
# within an arbitrary set of states {2, 5, 0}
path_data = [d[a == i][:, ::2] for i in [2, 5, 0]]
msme.plot_stackdist(path_data)

Example #18

0

Show file

import matplotlib
import matplotlib.patches as mpatches
from seaborn.distributions import (_scipy_univariate_kde, _scipy_bivariate_kde)

###READ TRAJECTORY FILES AND PREPROCESSING the data
ds=dataset("*.nc", topology="s.pdb")                                                                                     
feat = DihedralFeaturizer(types=['phi', 'psi'])                                                                                                                       
ds_alpha=ds.fit_transform_with(feat, "dihed/",fmt='dir-npy')                                                                                                           
ds_alpha = dataset("./dihed/")                                                                                         
print(len(ds_alpha),len(ds))                                                                                           
print(ds[0].xyz.shape)                                                              
ds_alpha = dataset("./dihed/")                                                                                          
tica_mdl = tICA(lag_time=10,n_components=2)                                                                                                                          
tica_features = ds_alpha.fit_transform_with(tica_mdl, out_ds = 'tica')                                                                                                 
tica_features = dataset("./tica/")                                                                                     
kmeans_mdl = MiniBatchKMeans(10)                                                                                       
assignments = tica_features.fit_transform_with(kmeans_mdl, out_ds='assignments/')                                       
assignments = dataset("assignments/")  
import msmexplorer as msme                                                                                                                                                                          
from matplotlib import pyplot as plt        
tica_trajs=dataset("./tica/")

###PREPROCESSING THE TICA DATA
j=0
tica=[]
for k in range(6):
    f1=[]
    for i in range(len(tica_trajs)):
        f=list(tica_trajs[i][j:j+800])
        f1.append(f)
    f1=np.array(f1)

Example #19

0

Show file

File: msmbuilder_bootstrap.py Project: mincheoly/protein-dynamics-inference

featurizer = DihedralFeaturizer(types=['phi', 'psi'])
diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy')

#tICA
from msmbuilder.decomposition import tICA
tica_model = tICA(lag_time=2, n_components=4)
# fit and transform can be done in seperate steps:
tica_model = diheds.fit_with(tica_model)
tica_trajs = diheds.transform_with(tica_model, 'ticas/', fmt='dir-npy')

txx = np.concatenate(tica_trajs)

# clustering: can change hyperparameters
from msmbuilder.cluster import MiniBatchKMeans
#clusterer = MiniBatchKMeans(n_clusters=num_clusters)
clusterer = MiniBatchKMeans(n_clusters=num_clusters, max_no_improvement=1000, batch_size=num_clusters*10)
clustered_trajs = tica_trajs.fit_transform_with(
    clusterer, 'kmeans/', fmt='dir-npy'
)

# msm builder
from msmbuilder.msm import MarkovStateModel
from msmbuilder.utils import dump
msm = MarkovStateModel(lag_time=2, n_timescales=20, ergodic_cutoff='off')
msm.fit(clustered_trajs)

# save tIC plot
import matplotlib
matplotlib.use('Agg') # Must be placed before matplotlib.pyplot
import matplotlib.pyplot as plt
plt.hexbin(txx[:, 0], txx[:, 1], bins='log', mincnt=1, cmap="bone_r")

Example #20

0

Show file

File: run_more_cvs.py Project: RobertArbon/four_well_analysis

import os
from glob import glob
import numpy as np
from multiprocessing import Pool
import pandas as pd
from msmbuilder.featurizer import DihedralFeaturizer, KappaAngleFeaturizer
from sklearn.model_selection import cross_val_score, cross_val_predict

# Globals
num_procs = 5
traj_dir = '/mnt/storage/home/ra15808/scratch/train'
# traj_dir = '/Users/robert_arbon/Datasets/DHFR/train'

pipe_fixed = Pipeline([('variance_cut', VarianceThreshold()),
                       ('tica', tICA(kinetic_mapping=True)),
                       ('cluster', MiniBatchKMeans()),
                       ('msm',
                        MarkovStateModel(n_timescales=2,
                                         lag_time=50,
                                         verbose=True))])

pipe_csp = Pipeline([('variance_cut', VarianceThreshold()),
                     ('tica', tICA(kinetic_mapping=True)),
                     ('cluster', MiniBatchKMeans()),
                     ('msm',
                      MarkovStateModel(use_gap='timescales',
                                       lag_time=50,
                                       verbose=True))])

best = pd.read_pickle('best_trials.pickl')

Example #21

0

Show file

File: cluster.py Project: xy21hb/msmbuilder

"""Cluster tICA results

{{header}}

Meta
----
depends:
 - ttrajs
 - meta.pandas.pickl
"""
from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.cluster import MiniBatchKMeans

## Load
meta, ttrajs = load_trajs('ttrajs')

## Fit
dim = 5
kmeans = MiniBatchKMeans(n_clusters=500)
kmeans.fit([traj[:, :dim] for traj in ttrajs.values()])

## Transform
ktrajs = {}
for k, v in ttrajs.items():
    ktrajs[k] = kmeans.partial_transform(v[:, :dim])

## Save
print(kmeans.summarize())
save_trajs(ktrajs, 'ktrajs', meta)
save_generic(kmeans, 'kmeans.pickl')

Example #22

0

Show file

    tica_model = tICA(lag_time=40, n_components=20)

# fit and transform can be done in seperate steps:
tica_model = diheds.fit_with(tica_model)
tica_trajs = diheds.transform_with(tica_model, 'ticas/', fmt='dir-npy')

txx = np.concatenate(tica_trajs)

# save tICA
np.savetxt(folder + 'tICA_coord_+' + which_dataset + '.csv',
           txx,
           delimiter=',')

# clustering
from msmbuilder.cluster import MiniBatchKMeans
clusterer = MiniBatchKMeans(n_clusters=num_clusters)  #100 for camodulin
clustered_trajs = tica_trajs.fit_transform_with(clusterer,
                                                'kmeans/',
                                                fmt='dir-npy')

# msm builder
from msmbuilder.msm import MarkovStateModel
from msmbuilder.utils import dump

if which_dataset == 'fspeptide':
    msm = MarkovStateModel(lag_time=2, n_timescales=20, ergodic_cutoff='on')
if which_dataset == 'apo_calmodulin':
    msm = MarkovStateModel(lag_time=20, n_timescales=20, ergodic_cutoff='on')

msm.fit(clustered_trajs)

Example #23

0

Show file

from msmbuilder.featurizer import DihedralFeaturizer
featurizer = DihedralFeaturizer(types=['phi', 'psi'])
diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy')

#tICA
from msmbuilder.decomposition import tICA
tica_model = tICA(lag_time=2, n_components=4)
# fit and transform can be done in seperate steps:
tica_model = diheds.fit_with(tica_model)
tica_trajs = diheds.transform_with(tica_model, 'ticas/', fmt='dir-npy')

txx = np.concatenate(tica_trajs)

# clustering
from msmbuilder.cluster import MiniBatchKMeans
clusterer = MiniBatchKMeans(n_clusters=num_clusters)
clustered_trajs = tica_trajs.fit_transform_with(
    clusterer, 'kmeans/', fmt='dir-npy'
)

# msm builder
from msmbuilder.msm import MarkovStateModel
from msmbuilder.utils import dump
msm = MarkovStateModel(lag_time=20, n_timescales=20, ergodic_cutoff='on')
msm.fit(clustered_trajs)

# Get MFPT
from msmbuilder.tpt import mfpts 
mfpt_matrix = mfpts(msm)

# Get flux matrix