which_dataset = args.which_dataset feature = args.feature # Folder to save everything folder = '/scratch/users/mincheol/' + which_dataset + '/sim_datasets/' import tempfile import os os.chdir(tempfile.mkdtemp()) xyz = [] # placeholder print(which_dataset) if which_dataset == 'fspeptide': # Get data fs_peptide = FsPeptide() fs_peptide.cache() xyz = dataset(fs_peptide.data_dir + "/*.xtc", topology=fs_peptide.data_dir + '/fs-peptide.pdb', stride=10) print("{} trjaectories".format(len(xyz))) # msmbuilder does not keep track of units! You must keep track of your # data's timestep to_ns = 0.5 print("with length {} ns".format(set(len(x) * to_ns for x in xyz))) if which_dataset == 'apo_calmodulin': print('correct') xyz = dataset('/scratch/users/mincheol/apo_trajectories' + '/*.lh5', stride=10) #featurization
import mdtraj as md import pandas as pd import glob import os from scipy.stats import vonmises as vm from msmbuilder.example_datasets import fetch_fs_peptide, FsPeptide from msmbuilder.featurizer import DihedralFeaturizer, AlphaAngleFeaturizer,\ KappaAngleFeaturizer,ContactFeaturizer,VonMisesFeaturizer """ Series of tests to make sure all the describe features are putting the right features in the right place """ fs = FsPeptide() fs.cache() dirname = fs.data_dir top = md.load(dirname + "/fs-peptide.pdb") if np.random.choice([True, False]): atom_ind = [ i.index for i in top.top.atoms if i.residue.is_protein and ( i.residue.index in range(15) or i.residue.index in range(20, 23)) ] else: atom_ind = [i.index for i in top.top.atoms] trajectories = [ md.load(fn, stride=100, top=top, atom_indices=atom_ind) for fn in glob.glob(os.path.join(dirname, "trajectory*.xtc")) ]