from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic from msmbuilder.io.sampling import sample_states from sklearn.neighbors import KDTree import msmexplorer as msme from msmbuilder.tpt import net_fluxes, fluxes from msmbuilder.tpt import paths sns.set_style('ticks') colors = sns.color_palette() ## Load kmeans = load_generic('../kcenters_30_100_5.pickl') msm = load_generic('msm_kcen_30_100_5_16.pickl') meta, ttrajs = load_trajs('../../ttrajs_a0_30') txx = np.concatenate(list(ttrajs.values())) a1 = ttrajs[14] ## Plot microstates def plot_microstates(ax): ax.hexbin(txx[:, 0], txx[:, 1], cmap='Greys', mincnt=1, bins='log', ) scale = 100 / np.max(msm.populations_) add_a_bit = 25 ax.scatter(a1[0,0],a1[0,1], marker="x", s=200, c='g') ax.scatter(middles2[two, 0],
#!/bin/env python import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.multiclass import OneVsOneClassifier from msmbuilder.io import load_meta, load_trajs import sys from sklearn.externals import joblib depth = 9 meta, all_data = load_trajs('alpha_carbon/') meta, all_label = load_trajs('macro-mapping/') all_data_one = np.concatenate(list(all_data.values())) all_label_one = np.concatenate(list(all_label.values())) clf = OneVsOneClassifier( RandomForestClassifier(n_estimators=100, max_depth=depth, random_state=0)) clf.fit(all_data_one, all_label_one) print(' Depth %d Train Accu: %.3f' % (depth, np.sum(clf.predict(all_data_one) == all_label_one) / len(all_label_one))) ## save model joblib.dump(clf, 'ovo-randomforest/final_es100_' + str(depth) + ".pkl")
"""Reduce dimensionality with tICA msmbuilder autogenerated template version 2 created 2017-05-23T16:38:49.125259 please cite msmbuilder in any publications """ from msmbuilder.io import load_trajs, save_trajs, save_generic from msmbuilder.decomposition import tICA ## Load tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True) meta, ftrajs = load_trajs("ftrajs") ## Fit tica.fit(ftrajs.values()) ## Transform ttrajs = {} for k, v in ftrajs.items(): ttrajs[k] = tica.partial_transform(v) ## Save save_trajs(ttrajs, 'ttrajs', meta) save_generic(tica, 'tica.pickl')
""" # ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs sns.set_style('ticks') colors = sns.color_palette() ## Load meta, rmsds = load_trajs('rmsds') ## Plot box plot def plot_boxplot(ax): catted = np.concatenate([rmsds[k] for k in meta.index]) sns.boxplot(catted * 10, ax=ax) ax.set_xlabel(r'RMSD / $\mathrm{\AA}$', fontsize=18) ax.set_yticks([]) # ax.set_xticks(fontsize=16) #TODO: fontsize ## Report bad trajectories def bad_trajs(cutoff=0.7): bad = {} for k in meta.index:
import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt from matplotlib.ticker import FormatStrFormatter sns.set_style("white") def print_timescales(timescales): pass if __name__ == "__main__": all_msms = load_generic('rmsd_msms.pickl') meta, ctraj_dict = load_trajs('ctraj-200') long_ctrajs = [ np.squeeze(traj) for traj in ctraj_dict.values() if traj.shape[0] > 1000 ] ps_to_ns = 1000 n_ts = 10 timescales = [] lags = [] for msm in all_msms: timescales.append(msm.timescales_[:n_ts]) lags.append(msm.get_params()['lag_time']) lags = np.array(lags) timescales = np.array(timescales).T / ps_to_ns msm = all_msms[np.extract(lags == 2000, np.arange(len(lags)))[0]]
from msmbuilder.io import load_trajs, save_trajs, save_generic, load_generic from pyemma.msm import bayesian_markov_model import time import matplotlib matplotlib.use('Agg') import numpy as np import seaborn as sns from matplotlib import pyplot as plt sns.set_style('ticks') colors = sns.color_palette() import pickle start_time = time.time() ## Load meta, ktrajs = load_trajs('../../ktrajs_cen_30_100_5') dtrajs = list(ktrajs.values()) type(dtrajs[0]) ## Fit msm = bayesian_markov_model(dtrajs, lag=16, nsamples=100000) print('done with bmm') ## Load kmeans = load_generic('../../kcenters_30_100_5.pickl') msm2 = load_generic('../msm_kcen_30_100_5_16.pickl') meta, ttrajs = load_trajs('../../../ttrajs_a0_30') txx = np.concatenate(list(ttrajs.values())) a1 = ttrajs[14] print('done with load')
"""Reduce dimensionality with tICA {{header}} Meta ---- depends: - ftrajs - meta.pandas.pickl """ from msmbuilder.io import load_trajs, save_trajs, save_generic from msmbuilder.decomposition import tICA ## Load tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True) meta, ftrajs = load_trajs("ftrajs") ## Fit tica.fit(ftrajs.values()) ## Transform ttrajs = {} for k, v in ftrajs.items(): ttrajs[k] = tica.partial_transform(v) ## Save save_trajs(ttrajs, "ttrajs", meta) save_generic(tica, "tica.pickl")
""" import matplotlib matplotlib.use('Agg') import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs sns.set_style('ticks') colors = sns.color_palette() ## Load meta, rmsds = load_trajs('rmsds') ## Plot box plot def plot_boxplot(ax): catted = np.concatenate([rmsds[k] for k in meta.index]) sns.boxplot(catted * 10, ax=ax) ax.set_xlabel(r'RMSD / $\mathrm{\AA}$', fontsize=18) ax.set_yticks([]) # ax.set_xticks(fontsize=16) #TODO: fontsize ## Report bad trajectories def bad_trajs(cutoff=0.7): bad = {} for k in meta.index:
width = (2 * np.pi) / N ax1.bar(np.deg2rad(theta[1:]), radii, width=width, color=color, alpha=.5) if title is not None: plt.suptitle(title) plt.tight_layout() f = plt.gcf() return f, (ax1, ax2) if __name__ == '__main__': # Load meta, dtrajs = load_trajs('dtrajs') dihed_names = ['cTnI dihedral', 'cTnT dihedral'] data_dict = {} # This will have keys, one for each sim type for t in meta['type'].unique(): indexes = meta[meta['type'] == t].index # The simulation indexes dihed_arr = np.concatenate([dtrajs.get(key) for key in indexes]) data_dict[t] = dihed_arr n_diheds = list(data_dict.values())[0].shape[1] for i in range(n_diheds): n_types = 0 patch_list = [] for k, v in data_dict.items(): data = np.rad2deg(v[:, i])
def clust(args): k, v, cluster = args print(k) ctraj = cluster.transform(v) return k, ctraj if __name__ == "__main__": # Load data meta = load_meta() tops = preload_tops(meta) totframes = meta['nframes'].sum() ctraj_path = 'ctraj-200' if isdir(ctraj_path): meta, all_ctrajs_dict = load_trajs(ctraj_path) else: def traj_load(irow): i, row = irow traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) return i, traj traj_dict = dict(map(traj_load, meta.iterrows())) all_trajs = [traj for traj in traj_dict.values()] cluster = LandmarkAgglomerative(n_clusters=200, n_landmarks=int(totframes /200), linkage='ward', metric='rmsd') cluster.fit(all_trajs) # TODO will this work? args = [(k,v,cluster) for k, v in traj_dict.items()]
# NB: Can't do this in parallel (easily) as the algorithm requires all trajectory data to do transform # from msmbuilder.preprocessing import RobustScaler import numpy as np from msmbuilder.io import load_trajs, save_trajs, save_generic import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt from utilities import plot_box if __name__ == '__main__': # Load feature_name = 'Positions' meta, feature_trajs = load_trajs('Unscaled-{}-ftraj'.format(feature_name)) # Select scaler featurizer = RobustScaler() # Transform values featurizer.fit_transform(feature_trajs.values()) scaled_trajs = {} for k, v in feature_trajs.items(): scaled_trajs[k] = featurizer.partial_transform(v) # Plot unscaled features ftrajs = np.concatenate([fx[::100] for fx in scaled_trajs.values()]) fig, ax = plt.subplots(figsize=(15, 5)) plot_box(ax, fxx=ftrajs, feature_name='Scaled {}'.format(feature_name)) fig.tight_layout()
def plot(data, fname): plt.clf() width = 10 data = np.array(data) bins = np.arange(0, max(data), step=width) sns.distplot(data, norm_hist=True, kde=True, bins=bins, label='No ones') plt.ylim((0, 0.001)) plt.xlabel('First passage time (ps)') plt.text( 1000, 0.0005, 'MFPT = {0:4.2f} +/- {1:4.2f} ps'.format(data.mean(), 2 * data.std())) plt.savefig(fname, transparanet=True) if __name__ == "__main__": meta, ctraj = load_trajs('pcca-2-traj') fpt = {} fpt[(0, 1)] = [] fpt[(1, 0)] = [] for k, v in ctraj.items(): count = 0 for i in range(len(v) - 1): v1, v2 = v[i:(i + 2)] if math.isnan(v1) or math.isnan(v2): count = 0 else: count += 1 if v1 != v2: fpt[(v1, v2)].append(count) count = 0
# ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load kmeans = load_generic('kmeans.pickl') meta, ktrajs = load_trajs('ktrajs') meta, ttrajs = load_trajs('ttrajs', meta) txx = np.concatenate(list(ttrajs.values())) def plot_cluster_centers(ax): ax.hexbin(txx[:, 0], txx[:, 1], cmap=sns.cubehelix_palette(as_cmap=True), mincnt=1, bins='log', ) ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=40, c=colors[0], ) ax.set_xlabel("tIC 1", fontsize=16)
{{header}} Meta ---- depends: - top.pdb - trajs """ import mdtraj as md from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic from msmbuilder.io.sampling import sample_msm ## Load meta, ttrajs = load_trajs('ttrajs') msm = load_generic('msm.pickl') kmeans = load_generic('kmeans.pickl') ## Sample # Warning: make sure ttrajs and kmeans centers have # the same number of dimensions inds = sample_msm(ttrajs, kmeans.cluster_centers_, msm, n_steps=200, stride=1) save_generic(inds, "msm-traj-inds.pickl") ## Make trajectory top = preload_top(meta) traj = md.join( md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top) for traj_i, frame_i in inds )
# # Does grid search for optimum parameters # from msmbuilder.io import load_trajs, save_trajs, save_generic from msmbuilder.decomposition import tICA, sparsetica from sklearn.model_selection import ShuffleSplit, GridSearchCV import pandas as pd if __name__ == '__main__': # Load data feature_name = 'Positions' meta, ftrajs = load_trajs("Scaled-{}-ftraj".format(feature_name)) X = list(ftrajs.values()) # Specify CV strategy and parameters cv_iter = ShuffleSplit(n_splits=10, test_size=0.5, random_state=0) param_grid = [{'n_components': [10, 20, 40], 'lag_time': [1, 10, 100]}] # CV object model = tICA(kinetic_mapping=True) # Do grid search clf = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv_iter, n_jobs=2) clf.fit(X) # Save results
# Plot features from msmbuilder.io import load_trajs import numpy as np import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt import sys import seaborn as sns colors = sns.color_palette("colorblind", 8) for feature in ['angles', 'dihedrals', 'bonds', 'contacts']: meta, ftraj = load_trajs( 'featurized_trajectories/{}-ftraj'.format(feature)) ftraj = np.concatenate([traj for traj in ftraj.values()]) if feature in ['angles', 'dihedrals']: sample = ftraj[np.random.choice(ftraj.shape[0], size=10000), :] sample = sample[:, np.arange(0, ftraj.shape[1], 2)] print(feature, sample.shape) elif feature in ['contacts']: sample = ftraj[np.random.choice(ftraj.shape[0], size=10000)] print(feature, sample.shape) else: sample = ftraj[np.random.choice(ftraj.shape[0], size=10000), :] print(feature, sample.shape) try: n_feats_plot = sample.shape[1] except IndexError: n_feat_plot = 1
from msmbuilder.io import load_generic, load_trajs import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from utilities import * import numpy as np import pandas as pd import seaborn as sns # LOAD DATA # TODO change the default name of the param search param_search = load_generic('Positions-grid-search-results.pickl') tica = load_generic('Positions-tica.pickl') meta, ttrajs = load_trajs('Positions-ttrajs') txx = np.concatenate(list(ttrajs.values())) params, n_comb = get_param_combs(param_search) # # PARAMETER SEARCH PLOT # fig, axes = plt.subplots(nrows=n_comb, ncols=1) # axes = plot_param_line(param_search, axes, params) # plt.tight_layout() # plt.savefig('Positions-param-results.pdf') # plt.clf() # # # tICA DISTRIBUTION PLOT # plot_tica_distribution(txx, sample_size=2000, ndims=4) # plt.savefig('Positions-tica-dist.pdf') # plt.clf() # # # TIME SCALES PLOT # fig, axes = plt.subplots()
# ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load kmeans = load_generic('kmeans.pickl') meta, ktrajs = load_trajs('ktrajs') meta, ttrajs = load_trajs('ttrajs', meta) txx = np.concatenate(list(ttrajs.values())) def plot_cluster_centers(ax): ax.hexbin( txx[:, 0], txx[:, 1], cmap=sns.cubehelix_palette(as_cmap=True), mincnt=1, bins='log', ) ax.scatter( kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
from msmbuilder.io import load_meta, preload_tops, save_generic, itertrajs, backup, load_trajs import mdtraj as md from msmbuilder.cluster import LandmarkAgglomerative import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt import numpy as np import seaborn as sns from utilities import to_dataframe # load trajectories feature = 'dihedrals' meta, traj_dict= load_trajs('pruned_trajectories/{}-ftraj'.format(feature)) trajs = [traj for traj in traj_dict.values()] nframes = int(np.max(meta['nframes'].unique()[0])) # cluster num_clusters=10 cluster = LandmarkAgglomerative(n_clusters=num_clusters, n_landmarks=200, linkage='ward', metric='euclidean') cluster.fit(trajs) ctraj = {} for k, v in traj_dict.items(): v = v.copy(order='C') v = cluster.partial_predict(v) diff = nframes-v.shape[0] v = np.append(v, np.zeros(diff)-1) ctraj[k] = v # Convert to DF for plotting and sampling.
""" # ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs sns.set_style('ticks') colors = sns.color_palette() ## Load meta, ftrajs = load_trajs('ftrajs') # (stride by 100 for memory concerns) fxx = np.concatenate([fx[::100] for fx in ftrajs.values()]) ## Box and whisker plot def plot_box(ax): n_feats_plot = min(fxx.shape[1], 100) ax.boxplot( fxx[:, :100], boxprops={'color': colors[0]}, whiskerprops={'color': colors[0]}, capprops={'color': colors[0]}, medianprops={'color': colors[2]}, )
Meta ---- depends: - meta.pandas.pickl - ktrajs """ from multiprocessing import Pool import pandas as pd from msmbuilder.io import load_trajs from msmbuilder.msm import MarkovStateModel ## Load meta, ktrajs = load_trajs('ktrajs') ## Parameters lagtimes = [2 ** i for i in range(8)] ## Define what to do for parallel execution def at_lagtime(lt): msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False) msm.fit(list(ktrajs.values())) ret = { 'lag_time': lt, 'percent_retained': msm.percent_retained_, } for i in range(msm.n_timescales): ret['timescale_{}'.format(i)] = msm.timescales_[i]
Meta ---- depends: - meta.pandas.pickl - ktrajs """ from multiprocessing import Pool import pandas as pd from msmbuilder.io import load_trajs from msmbuilder.msm import MarkovStateModel ## Load meta, ktrajs = load_trajs('ktrajs') ## Parameters lagtimes = [2**i for i in range(8)] ## Define what to do for parallel execution def at_lagtime(lt): msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False) msm.fit(list(ktrajs.values())) ret = { 'lag_time': lt, 'percent_retained': msm.percent_retained_, } for i in range(msm.n_timescales): ret['timescale_{}'.format(i)] = msm.timescales_[i]
{{header}} """ # ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np from matplotlib import pyplot as plt import seaborn as sns from msmbuilder.io import load_trajs sns.set_style('ticks') colors = sns.color_palette() ## Load meta, ftrajs = load_trajs('ftrajs') # (stride by 100 for memory concerns) fxx = np.concatenate([fx[::100] for fx in ftrajs.values()]) ## Box and whisker plot def plot_box(ax): n_feats_plot = min(fxx.shape[1], 100) ax.boxplot(fxx[:, :100], boxprops={'color': colors[0]}, whiskerprops={'color': colors[0]}, capprops={'color': colors[0]}, medianprops={'color': colors[2]}, ) if fxx.shape[1] > 100:
""" import matplotlib matplotlib.use('Agg') import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load tica = load_generic('tica.pickl') meta, ttrajs = load_trajs('ttrajs') txx = np.concatenate(list(ttrajs.values())) ## Heatmap def plot_heatmap(ax): ax.hexbin(txx[:, 0], txx[:, 1], cmap=sns.cubehelix_palette(as_cmap=True), mincnt=1, bins='log') ax.set_xlabel("tIC 1", fontsize=16) ax.set_ylabel("tIC 2", fontsize=16) ## Timescales
# # NB: Can't do this in parallel (easily) as the algorithm requires all trajectory data to do transform # from msmbuilder.preprocessing import RobustScaler import numpy as np from msmbuilder.io import load_trajs, save_trajs, save_generic import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt from utilities import plot_box if __name__ == '__main__': # Load meta, feature_trajs = load_trajs('ftraj') # Select scaler featurizer = RobustScaler() # Transform values featurizer.fit_transform(feature_trajs.values()) scaled_trajs = {} for k, v in feature_trajs.items(): scaled_trajs[k] = featurizer.partial_transform(v) # Save sample = np.concatenate([fx for fx in scaled_trajs.values()]) sample = sample[np.random.choice(sample.shape[0], 1000, replace=False), :] variance = np.apply_along_axis(np.var, axis=0, arr=sample) order = np.argsort(variance)