def test_that_all_featurizers_run(): # TODO: include all featurizers, perhaps with generator tests trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all = featurizer.transform(trajectories) featurizer = SuperposeFeaturizer(np.arange(15), trj0) X_all = featurizer.transform(trajectories) featurizer = DihedralFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) featurizer = VonMisesFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) # Below doesn't work on ALA dipeptide # featurizer = msmbuilder.featurizer.ContactFeaturizer() # X_all = featurizer.transform(trajectories) featurizer = RMSDFeaturizer(trj0) X_all = featurizer.transform(trajectories)
def read_and_featurize(filename, dihedrals=['chi2'], stride=10): #print("reading and featurizing %s" %(filename)) top = md.load_frame(filename, 0).topology #print("got top") atom_indices = [a.index for a in top.atoms if a.residue.resSeq == 93 and a.residue != "POPC" and str(a.residue)[0] == "H"] print((len(atom_indices))) #atom_indices = [a.index for a in top.atoms if a.residue.chain.index == 0 and a.residue.resSeq != 93 and a.residue != "POPC" and a.residue.resSeq != 130 and a.residue.resSeq != 172 and a.residue.resSeq != 79 and a.residue.resSeq != 341] #print("got indices") traj = md.load(filename, stride=1000, atom_indices=atom_indices) #print("got traj") featurizer = DihedralFeaturizer(types = dihedrals) features = featurizer.transform(traj_list = traj) #print(np.shape(features)) #print("finished featurizing") directory = filename.split("/") condition = directory[len(directory)-2] dcd_file = directory[len(directory)-1] new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride) new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features" new_condition_dir = "%s/%s" %(new_root_dir, condition) new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file) #print("saving features as %s" %new_file_full) verbosedump(features, new_file_full) return features
def test_function_featurizer(): trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0] # use the dihedral to compute phi for ala atom_ind = [[4, 6, 8, 14]] func = compute_dihedrals # test with args f = FunctionFeaturizer(func, func_args={"indices": atom_ind}) res1 = f.transform([trj0]) # test with function in a function without any args def funcception(trj): return compute_phi(trj)[1] f = FunctionFeaturizer(funcception) res2 = f.transform([trj0]) # know results f3 = DihedralFeaturizer(['phi'], sincos=False) res3 = f3.transform([trj0]) # compare all for r in [res2, res3]: np.testing.assert_array_almost_equal(res1, r)
def test_pipeline(): trajs = AlanineDipeptide().get_cached().trajectories p = Pipeline([('diheds', DihedralFeaturizer(['phi', 'psi'], sincos=False)), ('hmm', VonMisesHMM(n_states=4))]) predict = p.fit_predict(trajs) p.named_steps['hmm'].summarize()
def _test_tic_sampling(yaml_file, protein_name, tic_list, n_frames, scheme): #test to make sure we are sampling right sample_for_all_proteins(yaml_file, [protein_name], tic_list, n_frames, scheme=scheme) ser = ProteinSeries(yaml_file) prt = Protein(ser, protein_name) for tic_index in [0,1]: traj_path = os.path.join(base_dir,yaml_file["mdl_dir"], protein_name,"tic%d.xtc"%tic_index) traj_top = os.path.join(base_dir,yaml_file["mdl_dir"], protein_name, "prot.pdb") tica_traj = mdt.load(traj_path,top=traj_top) print(tica_traj.n_frames) feat = DihedralFeaturizer(types=['phi', 'psi','chi1']) f = feat.partial_transform(tica_traj) t_f = np.round(prt.tica_mdl.transform([f])) #check that the tic goes from min to max print("Look here",t_f[0]) assert t_f[0][0][tic_index] <= t_f[0][-1][tic_index] all_vals = [] for traj_tica_data in prt.tica_data.values(): all_vals.extend(traj_tica_data[:,tic_index]) #sort it because all three sampling schemes use it all_vals = np.round(np.sort(all_vals)) print(tic_index) print(t_f[0][:,tic_index] >= all_vals[0]) print(t_f[0][:,tic_index] <= all_vals[-1]) #make sure the frames are within limitsss assert (t_f[0][:,tic_index] >= all_vals[0]).all() assert (t_f[0][:,tic_index] <= all_vals[-1]).all() return True
def read_and_featurize_divided(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10): #print("reading and featurizing %s" %(filename)) traj_top = md.load_frame(filename, 0).topology atom_indices = [ a.index for a in traj_top.atoms if a.residue.name[0:2] != "HI" ] traj = md.load(filename, atom_indices=atom_indices) #print("got traj") featurizer = DihedralFeaturizer(types=dihedrals) features = featurizer.transform(traj_list=traj) #print(np.shape(features)) #print("finished featurizing") directory = filename.split("/") condition = directory[len(directory) - 2] dcd_file = directory[len(directory) - 1] new_file = "%s_features_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride) new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features" new_condition_dir = "%s/%s" % (new_root_dir, condition) new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file) #print("saving features as %s" %new_file_full) verbosedump(features, new_file_full) return features
def read_and_featurize(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10): print(("reading and featurizing %s" % (filename))) traj = md.load(filename) #test_traj_init = md.load_frame(filename,5) #test_traj_init.save_pdb("/scratch/users/enf/b2ar_analysis/test_init.pdb") #traj.topology = fix_topology(traj.topology) #traj[-1].save_pdb("/scratch/users/enf/b2ar_analysis/test_fixed.pdb") #traj.save_dcd("/scratch/users/enf/b2ar_analysis/test_fixed.dcd") #print("got traj") featurizer = DihedralFeaturizer(types=dihedrals) features = featurizer.transform(traj_list=traj) #print("finished featurizing") directory = filename.split("/") traj_file = directory[len(directory) - 1] condition = traj_file.split("_")[0].split(".")[0] print(("Condition %s has features of shape %s" % (condition, np.shape(features)))) new_file = "/scratch/users/enf/b2ar_analysis/combined_features/%s_features.h5" % condition verbosedump(features, new_file)
def featurize_project(proj_folder, top_folder, featurizer_object, stride, view): #if already featurized dont bother(should add a warning about this) if os.path.exists(proj_folder + "/featurized_traj.pkl"): return verboseload(proj_folder + "/featurized_traj.pkl") if featurizer_object is None: featurizer = DihedralFeaturizer(types=['phi', 'psi', 'chi1']) else: try: featurizer = verboseload(featurizer_object) except: sys.exit("Cant Load Featurizer using msmbuilder verboseload") feature_dict = {} traj_list = glob.glob(proj_folder + "/trajectories/*.dcd") jobs = [(proj_folder, top_folder, featurizer, traj, stride) for traj in traj_list] results = view.map_sync(featurize_traj, jobs) for result in results: feature_dict[result[0]] = result[1] verbosedump(feature_dict, proj_folder + "/featurized_traj.pkl") return feature_dict
def featurize_file(job_tuple): yaml_file, protein, feat, traj_file,stride = job_tuple yaml_file = load_yaml_file(yaml_file) if feat is None: feat = DihedralFeaturizer(types=['phi', 'psi','chi1']) _check_output_folder_exists(yaml_file, protein) output_folder = os.path.join(yaml_file["base_dir"], protein, yaml_file["feature_dir"]) traj_name = os.path.splitext(os.path.basename(traj_file))[0] output_fname = os.path.join(output_folder, traj_name+".jl") feat_descriptor = os.path.join(output_folder, "feature_descriptor.h5") try: trj = mdt.load(traj_file) except : warnings.warn("Removing %s because of misformed trajectory"%traj_file) os.remove(traj_file) return features = feat.partial_transform(trj) verbosedump(features, output_fname) if not os.path.isfile(feat_descriptor) and hasattr(feat, "describe_features"): dih_df = pd.DataFrame(feat.describe_features(trj[0])) verbosedump(dih_df, feat_descriptor) return
def test_dihedral_feat(): print(base_dir) pool = Pool(6) yaml_file = load_yaml_file(os.path.join(base_dir,"mdl_dir","project.yaml")) for prt in ["kinase_1", "kinase_2"]: print(prt) prj = yaml_file["project_dict"][prt][0] featurize_project_wrapper(yaml_file, prt, feat=None, stride=1, view=pool) feat = DihedralFeaturizer(types=['phi', 'psi','chi1']) flist = glob.glob(os.path.join(base_dir, prt , yaml_file["protein_dir"],"*.hdf5")) for i in np.random.choice(flist, 3): trj = mdt.load(i) my_feat = feat.partial_transform(trj) expected_fname = os.path.join(base_dir, prt, yaml_file["feature_dir"], os.path.splitext(os.path.basename(i))[0]+".jl") calc_feat = verboseload(expected_fname) assert np.allclose(my_feat, calc_feat) return True
def individual_traj_featurize(data_to_process): #print('Running individual traj featurize\n') test = 1 #print("Data process to do is :", data_to_process) featurizer_type = data_to_process[0] if featurizer_type == 'Dihedral': featurizer_data = DihedralFeaturizer(types=['phi', 'psi']) # print('Featurizer created:\n') featurized_data = featurizer_data.fit_transform(data_to_process[2]) #print('Finished individual traj featurize\n') return [data_to_process[1], featurized_data]
def featurize_trajectories(coords, featurizer): if featurizer == 'RMSDFeaturizer': from msmbuilder.featurizer import RMSDFeaturizer feat = RMSDFeaturizer(reference_traj=coords[0]) elif featurizer == 'DRIDFeaturizer': from msmbuilder.featurizer import DRIDFeaturizer feat = DRIDFeaturizer() elif featurizer == 'ContactFeaturizer': from msmbuilder.featurizer import ContactFeaturizer feat = ContactFeaturizer(scheme='ca') elif featurizer == 'DihedralFeaturizer': from msmbuilder.featurizer import DihedralFeaturizer feat = DihedralFeaturizer(types=['phi', 'psi']) return feat.fit_transform(coords)
def load_met(): from msmbuilder.example_datasets import MetEnkephalin print(type(MetEnkephalin)) trajs = MetEnkephalin().get().trajectories from msmbuilder.featurizer import AtomPairsFeaturizer pairs = [] for i in range(75): for j in range(i): pairs.append((j,i)) X = AtomPairsFeaturizer(pairs).fit_transform(trajs) from msmbuilder.featurizer import DihedralFeaturizer Y = DihedralFeaturizer().fit_transform(trajs) return X, Y
def load_fs(): from msmbuilder.example_datasets import MinimalFsPeptide trajs = MinimalFsPeptide().get().trajectories from msmbuilder.featurizer import AtomPairsFeaturizer pairs = [] for i in range(264): for j in range(i): pairs.append((j, i)) X = AtomPairsFeaturizer(pairs).fit_transform(trajs) from msmbuilder.featurizer import DihedralFeaturizer Y = DihedralFeaturizer().fit_transform(trajs) return X, Y
def build_model(self, user_defined_model): """ Load or build a model (Pipeline from scikit-learn) to do all the transforming and fitting :param user_defined_model: Either a string (to load from disk) or a Pipeline object to use as model :return model: Return the model back """ if user_defined_model is None: if os.path.exists(self.model_pkl_fname): logger.info('Loading model pkl file {}'.format( self.model_pkl_fname)) model = load_generic(self.model_pkl_fname) else: logger.info('Building default model based on dihedrals') # build a lag time of 1 ns for tICA and msm # if the stride is too big and we can't do that # use 1 frame and report how much that is in ns if self.app.meta is not None: lag_time = max(1, int(1 / self.timestep)) logger.info( 'Using a lag time of {} ns for the tICA and MSM'. format(lag_time * self.timestep)) else: self.timestep = None lag_time = 1 logger.warning( 'Cannot determine timestep. Defaulting to 1 frame.'. format(lag_time)) model = Pipeline([('feat', DihedralFeaturizer()), ('scaler', RobustScaler()), ('tICA', tICA(lag_time=lag_time, commute_mapping=True, n_components=10)), ('clusterer', MiniBatchKMeans(n_clusters=200)), ('msm', MarkovStateModel(lag_time=lag_time, ergodic_cutoff='off', reversible_type=None))]) else: if not isinstance(user_defined_model, Pipeline): raise ValueError( 'model is not an sklearn.pipeline.Pipeline object') else: logger.info('Using user defined model') model = user_defined_model return model
def test_code_works(): # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes # sure the code runs without erroring out trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = DihedralFeaturizer(['phi', 'psi'], trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = VonMisesHMM(n_states=4, n_init=1) hmm.fit(sequences) assert len(hmm.timescales_ == 3) assert np.any(hmm.timescales_ > 50)
def setUp(self): numpy.random.seed(12) self.top = 'data_app/runs/structure.prmtop' self.traj_1 = 'data_app/runs/run-000.nc' self.traj_2 = 'data_app/runs/run-001.nc' self.feat = DihedralFeaturizer() self.traj_dict = { 0: load(self.traj_1, top=self.top), 1: load(self.traj_2, top=self.top) } self.scaler = RobustScaler() self.tica = tICA(n_components=2) self.ftrajs = { 0: numpy.random.rand(100, 50), 1: numpy.random.rand(100, 50), }
def Get_dihedral_features_villin(): import os import shutil import mdtraj as md os.chdir('/homes/anuginueni/traj_villin') if(os.path.isdir('./diheds')): shutil.rmtree('./diheds') from msmbuilder.dataset import dataset t=md.load( "/homes/anuginueni/traj_villin/trajectory-331.xtc",top='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) from msmbuilder.featurizer import DihedralFeaturizer #for dihedrals featurizer = DihedralFeaturizer(types=['phi', 'psi']) #for dihedrals diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy') #for dihedrals des_feat=featurizer.describe_features(t) res = [ sub['resids'] for sub in des_feat ] print(str(res)) return diheds
def test_DihedralFeaturizer_describe_features_nosincos(): feat = DihedralFeaturizer(sincos=False) rnd_traj = np.random.randint(len(trajectories)) features = feat.transform([trajectories[rnd_traj]]) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) for f in range(25): f_index = np.random.choice(len(df)) atom_inds = df.iloc[f_index].atominds feature_value = md.compute_dihedrals(trajectories[rnd_traj], [atom_inds]) if feat.sincos: func = getattr(np, '%s' % df.iloc[f_index].otherinfo) feature_value = func(feature_value) assert (features[0][:, f_index] == feature_value.flatten()).all()
def fit_and_transform(directory, stride=5): projected_data_filename = "/scratch/users/enf/b2ar_analysis/phi_psi_chi_stride%d_projected.h5" % stride fit_model_filename = "/scratch/users/enf/b2ar_analysis/phi_psi_chi2_stride%s_tica_coords.h5" % stride #active_pdb_file = "/scratch/users/enf/b2ar_analysis/3P0G_pymol_prepped.pdb" active_pdb_file = "/scratch/users/enf/b2ar_analysis/system_B.pdb" tica_model = tICA(n_components=4) if not os.path.exists(projected_data_filename): print("loading feature files") feature_files = get_trajectory_files(directory) pool = mp.Pool(mp.cpu_count()) features = pool.map(load_features, feature_files) pool.terminate() if not os.path.exists(fit_model_filename): print("fitting data to tICA model") fit_model = tica_model.fit(features) verbosedump(fit_model, fit_model_filename) transformed_data = fit_model.transform(features) verbosedump(transformed_data, projected_data_filename) else: print("loading tICA model") fit_model = verboseload(fit_model_filename) transformed_data = fit_model.transform(features) verbosedump(transformed_data, projected_data_filename) else: fit_model = verboseload(fit_model_filename) transformed_data = verboseload(projected_data_filename) active_pdb = md.load(active_pdb_file) top = active_pdb.topology atom_indices = [ a.index for a in top.atoms if a.residue.is_protein and a.residue.resSeq != 341 and a.residue.name[0:2] != "HI" and a.residue.resSeq != 79 and a.residue.resSeq != 296 and a.residue.resSeq != 269 and a.residue. resSeq != 178 and a.residue.resSeq != 93 and a.residue.name != "NMA" and a.residue.name != "NME" and a.residue.name != "ACE" ] active_pdb = md.load(active_pdb_file, atom_indices=atom_indices) featurizer = DihedralFeaturizer(types=['phi', 'psi', 'chi2']) active_pdb_features = featurizer.transform(active_pdb) active_pdb_projected = fit_model.transform(active_pdb_features) print((active_pdb_projected[0:4]))
def Get_combined_features_villin(): from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.featurizer import ContactFeaturizer diheds= DihedralFeaturizer() contacts=ContactFeaturizer() features=[("di_villin",diheds),("con_villin",contacts)] import os import shutil os.chdir('/homes/anuginueni/traj_villin') if(os.path.isdir('/homes/anuginueni/traj_villin/combined')): shutil.rmtree('/homes/anuginueni/traj_villin/combined') from msmbuilder.dataset import dataset xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) from msmbuilder.feature_selection import FeatureSelector comb_features=FeatureSelector(features) co=xyz.fit_transform_with(comb_features, '/homes/anuginueni/traj_villin/combined/', fmt='dir-npy') return co
def test_pickle(): """Test pickling an HMM""" trajectories = AlanineDipeptide().get_cached().trajectories topology = trajectories[0].topology indices = topology.select('symbol C or symbol O or symbol N') featurizer = DihedralFeaturizer(['phi', 'psi'], trajectories[0][0]) sequences = featurizer.transform(trajectories) hmm = VonMisesHMM(n_states=4, n_init=1) hmm.fit(sequences) logprob, hidden = hmm.predict(sequences) with tempfile.TemporaryFile() as savefile: pickle.dump(hmm, savefile) savefile.seek(0, 0) hmm2 = pickle.load(savefile) logprob2, hidden2 = hmm2.predict(sequences) assert (logprob == logprob2)
def test_feature_slicer(): trajectories = AlanineDipeptide().get_cached().trajectories f = DihedralFeaturizer() fs = FeatureSlicer(f, indices=[0, 1]) y1 = fs.transform(trajectories) assert y1[0].shape[1] == 2 df = pd.DataFrame(fs.describe_features(trajectories[0])) assert len(df) == 2 assert 'psi' not in df.featuregroup[0] assert 'psi' not in df.featuregroup[1] fs = FeatureSlicer(f, indices=[2, 3]) y1 = fs.transform(trajectories) assert y1[0].shape[1] == 2 df = pd.DataFrame(fs.describe_features(trajectories[0])) assert len(df) == 2 assert 'phi' not in df.featuregroup[0] assert 'phi' not in df.featuregroup[1]
def featurize_trajectories(coords, featurizer): ''' Input coords : list of 'MDTrajDataset' object Output features : list of arrays, length n_trajs, each of shape (n_samples, n_features) ''' if featurizer == 'RMSDFeaturizer': from msmbuilder.featurizer import RMSDFeaturizer feat = RMSDFeaturizer(reference_traj=coords[0]) elif featurizer == 'DRIDFeaturizer': from msmbuilder.featurizer import DRIDFeaturizer feat = DRIDFeaturizer() elif featurizer == 'ContactFeaturizer': from msmbuilder.featurizer import ContactFeaturizer feat = ContactFeaturizer(scheme='ca') elif featurizer == 'DihedralFeaturizer': from msmbuilder.featurizer import DihedralFeaturizer feat = DihedralFeaturizer(types=['phi', 'psi']) return feat.fit_transform(coords)
def read_and_featurize(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10): print(("reading and featurizing %s" % (filename))) traj = md.load(filename).select('chain A and protein') featurizer = DihedralFeaturizer(types=dihedrals) features = featurizer.transform(traj_list=traj) print("finished featurizing") directory = filename.split("/") condition = directory[len(directory) - 2] dcd_file = directory[len(directory) - 1] new_file = "%s_features_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride) new_root_dir = "/home/enf/b2ar_analysis/subsampled_features/" new_condition_dir = "%s/%s" % (new_root_dir, condition) if not os.path.exists(new_condition_dir): os.makedirs(new_condition_dir) new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file) print(("saving features as %s" % new_file_full)) verbosedump(features, new_file_full) return features
def test_FeatureSelector_describe_features(): rnd_traj = np.random.randint(len(trajectories)) f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True) f1 = f_ca.transform([trajectories[rnd_traj]]) df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj])) f_dih = DihedralFeaturizer() f2 = f_dih.transform([trajectories[rnd_traj]]) df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj])) df_dict = {} df_dict["ca"] = df1 df_dict["dih"] = df2 f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)]) f3 = f_comb.transform([trajectories[rnd_traj]]) df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj])) assert len(df3) == len(df1) + len(df2) df4 = pd.concat([df_dict[i] for i in f_comb.feat_list]) # lets randomly compare 40 features for i in np.random.choice(range(len(df3)), 40): for j in df3.columns: assert eq(df3.iloc[i][j], df4.iloc[i][j])
topology=fs_peptide.data_dir + '/fs-peptide.pdb', stride=10) print("{} trjaectories".format(len(xyz))) # msmbuilder does not keep track of units! You must keep track of your # data's timestep to_ns = 0.5 print("with length {} ns".format(set(len(x) * to_ns for x in xyz))) if which_dataset == 'apo_calmodulin': print('correct') xyz = dataset('/scratch/users/mincheol/apo_trajectories' + '/*.lh5', stride=10) #featurization from msmbuilder.featurizer import DihedralFeaturizer featurizer = DihedralFeaturizer(types=['phi', 'psi'], sincos=False) print(xyz) diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy') #tICA from msmbuilder.decomposition import tICA if which_dataset == 'fspeptide': tica_model = tICA(lag_time=2, n_components=4) if which_dataset == 'apo_calmodulin': tica_model = tICA(lag_time=40, n_components=20) # fit and transform can be done in seperate steps: tica_model = diheds.fit_with(tica_model) tica_trajs = diheds.transform_with(tica_model, 'ticas/', fmt='dir-npy')
from msmbuilder.featurizer import DihedralFeaturizer import os, glob from msmbuilder.decomposition import tICA import mdtraj as md import pandas as pd from msmbuilder.msm import MarkovStateModel from msmbuilder.cluster import KMeans flist = glob.glob("../trajectory.xtc") top = md.load("../top.pdb") trj_list = [md.load(i, top=top) for i in flist] print("Found %d trajs" % len(trj_list)) f = DihedralFeaturizer(sincos=False) dump(f, "raw_featurizer.pkl") feat = f.transform(trj_list) dump(feat, "raw_features.pkl") f = load("./featurizer.pkl") dump(f, "featurizer.pkl") df1 = pd.DataFrame(f.describe_features(trj_list[0])) dump(df1, "feature_descriptor.pkl") feat = f.transform(trj_list) dump(feat, "features.pkl") t = tICA(lag_time=100, n_components=2, kinetic_mapping=False)
from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA from msmbuilder.cluster import MiniBatchKMeans from msmbuilder.msm import MarkovStateModel import numpy as np import msmexplorer as msme rs = np.random.RandomState(42) # Load Fs Peptide Data trajs = FsPeptide().get().trajectories # Extract Backbone Dihedrals featurizer = DihedralFeaturizer(types=['chi1']) diheds = featurizer.fit_transform(trajs) # Perform Dimensionality Reduction tica_model = tICA(lag_time=2, n_components=2) tica_trajs = tica_model.fit_transform(diheds) # Perform Clustering clusterer = MiniBatchKMeans(n_clusters=12, random_state=rs) clustered_trajs = clusterer.fit_transform(tica_trajs) # Construct MSM msm = MarkovStateModel(lag_time=2) assignments = msm.fit_transform(clustered_trajs) # Plot Stacked Distributions
""" Histogram Plot ============== """ from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.decomposition import tICA import numpy as np import msmexplorer as msme # Load Fs Peptide Data trajs = FsPeptide().get().trajectories # Extract Backbone Dihedrals featurizer = DihedralFeaturizer(types=['phi', 'psi']) diheds = featurizer.fit_transform(trajs) # Perform Dimensionality Reduction tica_model = tICA(lag_time=2, n_components=4) tica_trajs = tica_model.fit_transform(diheds) # Plot Histogram data = np.concatenate(tica_trajs, axis=0) msme.plot_histogram(data, color='oxblood', quantiles=(0.5, ), labels=['$tIC1$', '$tIC2$', '$tIC3$', '$tIC4$'], show_titles=True)