def build_model(self, user_defined_model): """ Load or build a model (Pipeline from scikit-learn) to do all the transforming and fitting :param user_defined_model: Either a string (to load from disk) or a Pipeline object to use as model :return model: Return the model back """ if user_defined_model is None: if os.path.exists(self.model_pkl_fname): logger.info('Loading model pkl file {}'.format( self.model_pkl_fname)) model = load_generic(self.model_pkl_fname) else: logger.info('Building default model based on dihedrals') # build a lag time of 1 ns for tICA and msm # if the stride is too big and we can't do that # use 1 frame and report how much that is in ns if self.app.meta is not None: lag_time = max(1, int(1 / self.timestep)) logger.info( 'Using a lag time of {} ns for the tICA and MSM'. format(lag_time * self.timestep)) else: self.timestep = None lag_time = 1 logger.warning( 'Cannot determine timestep. Defaulting to 1 frame.'. format(lag_time)) model = Pipeline([('feat', DihedralFeaturizer()), ('scaler', RobustScaler()), ('tICA', tICA(lag_time=lag_time, commute_mapping=True, n_components=10)), ('clusterer', MiniBatchKMeans(n_clusters=200)), ('msm', MarkovStateModel(lag_time=lag_time, ergodic_cutoff='off', reversible_type=None))]) else: if not isinstance(user_defined_model, Pipeline): raise ValueError( 'model is not an sklearn.pipeline.Pipeline object') else: logger.info('Using user defined model') model = user_defined_model return model
""" import matplotlib matplotlib.use('Agg') import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load kmeans = load_generic('kmeans.pickl') msm = load_generic('msm.pickl') meta, ttrajs = load_trajs('ttrajs') txx = np.concatenate(list(ttrajs.values())) ## Plot microstates def plot_microstates(ax): ax.hexbin( txx[:, 0], txx[:, 1], cmap='Greys', mincnt=1, bins='log', )
import seaborn as sns import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt from matplotlib.ticker import FormatStrFormatter sns.set_style("white") def print_timescales(timescales): pass if __name__ == "__main__": all_msms = load_generic('rmsd_msms.pickl') meta, ctraj_dict = load_trajs('ctraj-200') long_ctrajs = [ np.squeeze(traj) for traj in ctraj_dict.values() if traj.shape[0] > 1000 ] ps_to_ns = 1000 n_ts = 10 timescales = [] lags = [] for msm in all_msms: timescales.append(msm.timescales_[:n_ts]) lags.append(msm.get_params()['lag_time']) lags = np.array(lags) timescales = np.array(timescales).T / ps_to_ns
from msmbuilder.io import load_generic import numpy as np import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt import sys import seaborn as sns colors = sns.color_palette("colorblind", 8) import pandas as pd df = load_generic('grid_search.pickl') df = df.filter(regex=("param_.*|split.*")) id_cols = list(df.filter(regex=("param_.*")).columns) var_cols = list(df.filter(regex=("split.*")).columns) df = pd.melt(df, id_vars=id_cols, value_vars=var_cols, value_name='GMRQ') df['Data'] = df['variable'].str.extract('(test|train)', expand=True) g = sns.factorplot(x="param_tica__lag_time", y="value", hue="param_tica__n_components", col='param_cluster__n_clusters', data=df.ix[df['Data'] == 'test', :]) # parameter = 'n_clusters' # fig, ax = plt.subplots() # ax.errorbar(x=df['param_cluster__{}'.format(parameter)], y=df['mean_test_score'], yerr=df['std_test_score']*2) # ax.set_xlabel('{} value'.format(parameter)) # ax.set_ylabel('GMRQ Score') plt.savefig('results.pdf')
""" # ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load kmeans = load_generic('kmeans.pickl') meta, ktrajs = load_trajs('ktrajs') meta, ttrajs = load_trajs('ttrajs', meta) txx = np.concatenate(list(ttrajs.values())) def plot_cluster_centers(ax): ax.hexbin( txx[:, 0], txx[:, 1], cmap=sns.cubehelix_palette(as_cmap=True), mincnt=1, bins='log', ) ax.scatter( kmeans.cluster_centers_[:, 0],
def plot_single_var(results_df, which, fname='results'): """ plots single variable against the score. :param results_df: appopriately subsetted dataframe :param which: which parameter to plot :return: None """ label = which.split('__')[1] fig, ax = plt.subplots() x = results_df[which].values y = results_df['mean_test_score'] err = results_df['std_test_score'] ax.errorbar(x, y, err) ax.set_xscale("log") ax.set_ylabel('Score') ax.set_xlabel('{}'.format(label)) plt.savefig('figures/{0}-{1}.png'.format(fname, label)) search_params = load_generic('models/rmsd_model.pickl') df = pd.DataFrame(search_params.cv_results_) plot_single_var(df, which='param_cluster__n_clusters') print(df.head()) # best_model = search_params.best_estimator_ # msm = best_model.named_steps['msm'] # # plot_eigenvectors(model=msm, number=3) # plot_eigenvectors(model=msm, number=3, which='right')
msmbuilder autogenerated template version 2 created 2017-05-23T16:38:49.109805 please cite msmbuilder in any publications """ import mdtraj as md import os from msmbuilder.io.sampling import sample_states from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic ## Load meta, ttrajs = load_trajs('ttrajs') kmeans = load_generic("kmeans.pickl") ## Sample inds = sample_states(ttrajs, kmeans.cluster_centers_, k=10) save_generic(inds, "cluster-sample-inds.pickl") ## Make trajectories top = preload_top(meta) out_folder = "cluster_samples" backup(out_folder) os.mkdir(out_folder) for state_i, state_inds in enumerate(inds): traj = md.join( md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
sns.set_style('ticks') colors = sns.color_palette() import pickle start_time = time.time() ## Load meta, ktrajs = load_trajs('../../ktrajs_cen_30_100_5') dtrajs = list(ktrajs.values()) type(dtrajs[0]) ## Fit msm = bayesian_markov_model(dtrajs, lag=16, nsamples=100000) print('done with bmm') ## Load kmeans = load_generic('../../kcenters_30_100_5.pickl') msm2 = load_generic('../msm_kcen_30_100_5_16.pickl') meta, ttrajs = load_trajs('../../../ttrajs_a0_30') txx = np.concatenate(list(ttrajs.values())) a1 = ttrajs[14] print('done with load') print('active_count_fraction is ', msm.active_count_fraction) print('active_state_fraction is ', msm.active_state_fraction) print('shape of transition matrix is ', msm.P.shape) print('mean of eigenvalues is ', msm.sample_mean('eigenvalues', 7)) print('std of eigenvalues is ', msm.sample_std('eigenvalues', 7)) print('mean of pi is ', msm.sample_mean('pi')) print('std of pi is ', msm.sample_std('pi')) print('shape of right eigenvectors is ',
# ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load meta, ttrajs = load_trajs('ttrajs') txx = np.concatenate(list(ttrajs.values())) inds = load_generic("tica-dimension-0-inds.pickl") straj = [] for traj_i, frame_i in inds: straj += [ttrajs[traj_i][frame_i, :]] straj = np.asarray(straj) ## Overlay sampled trajectory on histogram def plot_sampled_traj(ax): ax.hexbin(txx[:, 0], txx[:, 1], cmap='magma_r', mincnt=1, bins='log', alpha=0.8, )
from msmbuilder.io import load_generic import numpy as np import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt import sys import seaborn as sns colors = sns.color_palette("colorblind", 8) import pandas as pd from glob import glob files = glob("*.pickl") all_dfs = [] for file in files: all_dfs.append(load_generic(file)) df = pd.concat(all_dfs) df.sort_values(by='param_cluster__n_clusters', inplace=True) # df = df.filter(regex=("param_.*|split.*")) # id_cols = list(df.filter(regex=("param_.*")).columns) # var_cols = list(df.filter(regex=("split.*")).columns) # # df = pd.melt(df,id_vars=id_cols, value_vars=var_cols, value_name='GMRQ') # df['Data'] = df['variable'].str.extract('(test|train)', expand=True) parameter = 'n_clusters' fig, ax = plt.subplots() ax.errorbar(x=df['param_cluster__{}'.format(parameter)], y=df['mean_test_score'], yerr=df['std_test_score'] * 2,
import numpy as np import matplotlib as mp import pyemma from matplotlib import cm from matplotlib import pyplot as plt from msmbuilder.dataset import dataset from msmbuilder.io import load_generic ktrajs_dir = 'ktrajs-extracted-kcenters-lag1500-2-900' ttrajs_dir = 'ttrajs-extracted-lag1500-new' ktrajs_pkl = '%s-mle.pickl' % ktrajs_dir micro_pkl = 'msm-%s-mle.pickl' % ktrajs_dir macro_pkl = 'msm-%s-pcca-mle.pickl' % ktrajs_dir clusterer = load_generic("../%s" % ktrajs_pkl) msm_model = load_generic("../%s" % micro_pkl) pcca = load_generic("../%s" % macro_pkl) ############################################################################ ### get macrostate mapping and state_label of each microstate ### ############################################################################ mapping = pcca.microstate_mapping_ state_label = msm_model.state_labels_ msm_label = msm_model.state_labels_ n_microstates = msm_model.n_states_ n_macrostates = pcca.n_macrostates ############################################################################ ### get first two tica and plot free energy by mle msm_model ### ############################################################################
from msmbuilder.io import load_generic, load_trajs import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from utilities import * import numpy as np import pandas as pd import seaborn as sns # LOAD DATA # TODO change the default name of the param search param_search = load_generic('Positions-grid-search-results.pickl') tica = load_generic('Positions-tica.pickl') meta, ttrajs = load_trajs('Positions-ttrajs') txx = np.concatenate(list(ttrajs.values())) params, n_comb = get_param_combs(param_search) # # PARAMETER SEARCH PLOT # fig, axes = plt.subplots(nrows=n_comb, ncols=1) # axes = plot_param_line(param_search, axes, params) # plt.tight_layout() # plt.savefig('Positions-param-results.pdf') # plt.clf() # # # tICA DISTRIBUTION PLOT # plot_tica_distribution(txx, sample_size=2000, ndims=4) # plt.savefig('Positions-tica-dist.pdf') # plt.clf() # # # TIME SCALES PLOT # fig, axes = plt.subplots()
colors = sns.color_palette("colorblind", 8) import pandas as pd def plot_lv(mod): lvs = mod.left_eigenvectors_ nstates = min(5, lvs.shape[1]) fig, axes = plt.subplots(nrows=nstates, sharey=False, sharex=True) for idx, ax in enumerate(axes): ax.bar(range(lvs.shape[0]), lvs[:, idx]) plt.savefig('msm-lvs.png') def plot_rv(mod): lvs = mod.right_eigenvectors_ nstates = min(5, lvs.shape[1]) fig, axes = plt.subplots(nrows=nstates, sharey=False, sharex=True) for idx, ax in enumerate(axes): ax.bar(range(lvs.shape[0]), lvs[:, idx]) plt.savefig('msm-rvs.png') msm = load_generic('msm-lag-4000-nclusters-20.pickl') plot_lv(msm) plot_rv(msm) print(msm.timescales_) print(msm.uncertainty_timescales()) plt.matshow(msm.transmat_) plt.savefig('msm-transmat.png')
""" # ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load tica = load_generic('tica.pickl') meta, ttrajs = load_trajs('ttrajs') txx = np.concatenate(list(ttrajs.values())) ## Heatmap def plot_heatmap(ax): ax.hexbin(txx[:, 0], txx[:, 1], cmap=sns.cubehelix_palette(as_cmap=True), mincnt=1, bins='log' ) ax.set_xlabel("tIC 1", fontsize=16) ax.set_ylabel("tIC 2", fontsize=16)
""" # ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load kmeans = load_generic('kmeans.pickl') meta, ktrajs = load_trajs('ktrajs') meta, ttrajs = load_trajs('ttrajs', meta) txx = np.concatenate(list(ttrajs.values())) def plot_cluster_centers(ax): ax.hexbin(txx[:, 0], txx[:, 1], cmap=sns.cubehelix_palette(as_cmap=True), mincnt=1, bins='log', ) ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=40, c=colors[0], )
matplotlib.use('Agg') import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load meta, ttrajs = load_trajs('ttrajs') txx = np.concatenate(list(ttrajs.values())) inds = load_generic("tica-dimension-0-inds.pickl") straj = [] for traj_i, frame_i in inds: straj += [ttrajs[traj_i][frame_i, :]] straj = np.asarray(straj) ## Overlay sampled trajectory on histogram def plot_sampled_traj(ax): ax.hexbin( txx[:, 0], txx[:, 1], cmap='magma_r', mincnt=1, bins='log', alpha=0.8,
# ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load meta, ttrajs = load_trajs('ttrajs') txx = np.concatenate(list(ttrajs.values())) kmeans = load_generic('kmeans.pickl') inds = load_generic("cluster-sample-inds.pickl") coordinates = [ np.asarray([ttrajs[traj_i][frame_i, :] for traj_i, frame_i in state_inds]) for state_inds in inds ] ## Overlay sampled states on histogram def plot_sampled_states(ax): ax.hexbin(txx[:, 0], txx[:, 1], cmap='magma_r', mincnt=1, bins='log', alpha=0.8,
import pyemma from msmbuilder.lumping import PCCAPlus,PCCA #from msmbuilder.lumping import PCCAPlus from msmbuilder.io import load_trajs, load_generic tlag = 400 ntrajs = 130 ktrajs_dir = 'ktrajs-extracted-kcenters-lag1500-2-1000' ktrajs_pkl = '%s-mle.pickl'%ktrajs_dir microtraj_dir = 'microktrajs-%s-mle8'%ktrajs_dir microtraj_pkl = 'msm-%s-mle8.pickl'%ktrajs_dir ttrajs_dir = 'ttrajs-atpair2-lag1500' #unwrapbp_dir = '../msm-remove10ns-cluster-unwrapbpnew2/unwrapbpnew2' msm = load_generic(microtraj_pkl) #meta, k_trajs = load_trajs(ktrajs_dir) ktrajs = dataset('%s-mle'%ktrajs_dir,mode='r',fmt='dir-npy',verbose=True) ttrajs = dataset(ttrajs_dir,mode='r',fmt='dir-npy',verbose=True) #utrajs = dataset(unwrapbp_dir,mode='r',fmt='dir-npy',verbose=True) ktrajs = [ktrajs[s].tolist() for s in range(len(ktrajs))] #tvaluesall = [ttrajs[s].tolist() for s in range(len(ttrajs)) if s!=9] #uvaluesall = [utrajs[s].tolist() for s in range(len(utrajs)) if s!=9] tvaluesall = [ttrajs[s].tolist() for s in range(len(ttrajs))] #uvaluesall = [utrajs[s].tolist() for s in range(len(utrajs))] #tvalues = [ttrajs[s] for s in range(len(ttrajs))] #txx = np.concatenate(tvaluesnew) msm_label = msm.state_labels_ print(len(msm_label))
# ? include "plot_header.template" # ? from "plot_macros.template" import xdg_open with context import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load meta, ttrajs = load_trajs('ttrajs') txx = np.concatenate(list(ttrajs.values())) kmeans = load_generic('kmeans.pickl') inds = load_generic("cluster-sample-inds.pickl") coordinates = [ np.asarray([ttrajs[traj_i][frame_i, :] for traj_i, frame_i in state_inds]) for state_inds in inds ] ## Overlay sampled states on histogram def plot_sampled_states(ax): ax.hexbin( txx[:, 0], txx[:, 1], cmap='magma_r', mincnt=1,
Meta ---- depends: - top.pdb - trajs """ import mdtraj as md from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic from msmbuilder.io.sampling import sample_msm ## Load meta, ttrajs = load_trajs('ttrajs') msm = load_generic('msm.pickl') kmeans = load_generic('kmeans.pickl') ## Sample # Warning: make sure ttrajs and kmeans centers have # the same number of dimensions inds = sample_msm(ttrajs, kmeans.cluster_centers_, msm, n_steps=200, stride=1) save_generic(inds, "msm-traj-inds.pickl") ## Make trajectory top = preload_top(meta) traj = md.join( md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top) for traj_i, frame_i in inds )
please cite msmbuilder in any publications """ import matplotlib matplotlib.use('Agg') import numpy as np import seaborn as sns from matplotlib import pyplot as plt from msmbuilder.io import load_trajs, load_generic sns.set_style('ticks') colors = sns.color_palette() ## Load tica = load_generic('tica.pickl') meta, ttrajs = load_trajs('ttrajs') txx = np.concatenate(list(ttrajs.values())) ## Heatmap def plot_heatmap(ax): ax.hexbin(txx[:, 0], txx[:, 1], cmap=sns.cubehelix_palette(as_cmap=True), mincnt=1, bins='log') ax.set_xlabel("tIC 1", fontsize=16) ax.set_ylabel("tIC 2", fontsize=16)
Meta ---- depends: - ../../top.pdb - ../../trajs """ import mdtraj as md import os from msmbuilder.io.sampling import sample_states from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic ## Load meta, ttrajs = load_trajs('ttrajs') kmeans = load_generic("kmeans.pickl") ## Sample inds = sample_states(ttrajs, kmeans.cluster_centers_, k=10) save_generic(inds, "cluster-sample-inds.pickl") ## Make trajectories top = preload_top(meta) out_folder = "cluster_samples" backup(out_folder) os.mkdir(out_folder) for state_i, state_inds in enumerate(inds):