def test_api_still_works_allframes(): traj1, traj2, ref = _random_trajs() old = OldRMSDFeaturizer(ref) new = RMSDFeaturizer(ref) data_old = old.fit_transform([traj1, traj2]) data_new = new.fit_transform([traj1, traj2]) for do, dn in zip(data_old, data_new): np.testing.assert_array_almost_equal(do, dn) assert dn.shape == (100, 7)
def test_alanine_dipeptide_basic(): # This test takes the rmsd of the 0th set of alanine dipeptide # trajectories relative to the 0th frame of the dataset. # The test asserts that all rmsd's calculated will be equal # to the ones that would be calculated straight from mdtraj. trajectories = AlanineDipeptide().get_cached().trajectories featurizer = RMSDFeaturizer(trajectories[0][0]) data = featurizer.transform(trajectories[0:1]) true_rmsd = md.rmsd(trajectories[0], trajectories[0][0]) np.testing.assert_array_almost_equal(data[0][:, 0], true_rmsd, decimal=4)
def test_api_still_works_names(): traj1, traj2, ref = _random_trajs() old = OldRMSDFeaturizer(trj0=ref, atom_indices=np.arange(50)) with warnings.catch_warnings(record=True) as w: new = RMSDFeaturizer(trj0=ref, atom_indices=np.arange(50)) assert "deprecated" in str(w[-1].message) assert "trj0" in str(w[-1].message) data_old = old.fit_transform([traj1, traj2]) data_new = new.fit_transform([traj1, traj2]) for do, dn in zip(data_old, data_new): np.testing.assert_array_almost_equal(do, dn) assert dn.shape == (100, 7)
def test_two_refs_omitting_indices(): # This test verifies that the result produced when # atom_indices are omitted is the same as the result # produced when atom_indices is all atom indices. trajectories = AlanineDipeptide().get_cached().trajectories featurizer_indices = RMSDFeaturizer(trajectories[0][0:2], np.arange(trajectories[0].n_atoms)) data_indices = featurizer_indices.transform(trajectories[0:1]) featurizer = RMSDFeaturizer(trajectories[0][0:2]) data = featurizer.transform(trajectories[0:1]) np.testing.assert_array_almost_equal(data[0], data_indices[0], decimal=4)
def test_that_all_featurizers_run(): # TODO: include all featurizers, perhaps with generator tests trajectories = AlanineDipeptide().get_cached().trajectories trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all = featurizer.transform(trajectories) featurizer = SuperposeFeaturizer(np.arange(15), trj0) X_all = featurizer.transform(trajectories) featurizer = DihedralFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) featurizer = VonMisesFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) # Below doesn't work on ALA dipeptide # featurizer = msmbuilder.featurizer.ContactFeaturizer() # X_all = featurizer.transform(trajectories) featurizer = RMSDFeaturizer(trj0) X_all = featurizer.transform(trajectories)
def test_two_refs_basic(): # This test uses the 0th and 1st frames of the 0th set of # adp trajectories as the two reference trajectories and # ensures that the rmsd of the 0th frame of the dataset with # the 0th reference are identical and the 1st frame of the # dataset with the 1st reference are identical. trajectories = AlanineDipeptide().get_cached().trajectories featurizer = RMSDFeaturizer(trajectories[0][0:2]) data = featurizer.transform(trajectories[0:1]) true_rmsd = np.zeros((trajectories[0].n_frames, 2)) for frame in range(2): true_rmsd[:, frame] = md.rmsd(trajectories[0], trajectories[0][frame]) np.testing.assert_almost_equal(data[0][0, 0], data[0][1, 1], decimal=3) np.testing.assert_almost_equal(data[0][1, 0], data[0][0, 1], decimal=3) np.testing.assert_array_almost_equal(data[0], true_rmsd, decimal=4)
def test_different_indices(): # This test verifies that the rmsd's calculated from # different sets of atom indices are not the same, # but that the arrays are still the same shape. trajectories = AlanineDipeptide().get_cached().trajectories n_atoms = trajectories[0].n_atoms halfway_point = n_atoms // 2 featurizer_first_half = RMSDFeaturizer(trajectories[0][0], np.arange(halfway_point)) data_first_half = featurizer_first_half.transform(trajectories[0:1]) featurizer_second_half = RMSDFeaturizer(trajectories[0][0], np.arange(halfway_point, n_atoms)) data_second_half = featurizer_second_half.transform(trajectories[0:1]) assert data_first_half[0].shape == data_second_half[0].shape # janky way to show that the arrays shouldn't be equal here assert sum(data_first_half[0][:, 0]) != sum(data_second_half[0][:, 0])
def featurize_trajectories(coords, featurizer): if featurizer == 'RMSDFeaturizer': from msmbuilder.featurizer import RMSDFeaturizer feat = RMSDFeaturizer(reference_traj=coords[0]) elif featurizer == 'DRIDFeaturizer': from msmbuilder.featurizer import DRIDFeaturizer feat = DRIDFeaturizer() elif featurizer == 'ContactFeaturizer': from msmbuilder.featurizer import ContactFeaturizer feat = ContactFeaturizer(scheme='ca') elif featurizer == 'DihedralFeaturizer': from msmbuilder.featurizer import DihedralFeaturizer feat = DihedralFeaturizer(types=['phi', 'psi']) return feat.fit_transform(coords)
def test_omitting_indices(): # This test verifies that the result produced when # atom_indices are omitted is the same as the result # produced when atom_indices is all atom indices. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer_indices = RMSDFeaturizer(trajectories[0][0], np.arange(trajectories[0].n_atoms)) data_indices = featurizer_indices.transform(trajectories[0:1]) featurizer = RMSDFeaturizer(trajectories[0][0]) data = featurizer.transform(trajectories[0:1]) np.testing.assert_array_almost_equal(data[0][:, 0], data_indices[0][:, 0], decimal=4)
def featurize_trajectories(coords, featurizer): ''' Input coords : list of 'MDTrajDataset' object Output features : list of arrays, length n_trajs, each of shape (n_samples, n_features) ''' if featurizer == 'RMSDFeaturizer': from msmbuilder.featurizer import RMSDFeaturizer feat = RMSDFeaturizer(reference_traj=coords[0]) elif featurizer == 'DRIDFeaturizer': from msmbuilder.featurizer import DRIDFeaturizer feat = DRIDFeaturizer() elif featurizer == 'ContactFeaturizer': from msmbuilder.featurizer import ContactFeaturizer feat = ContactFeaturizer(scheme='ca') elif featurizer == 'DihedralFeaturizer': from msmbuilder.featurizer import DihedralFeaturizer feat = DihedralFeaturizer(types=['phi', 'psi']) return feat.fit_transform(coords)
def main(): import argparse, textwrap parser = argparse.ArgumentParser( usage=textwrap.dedent( '''Use "python %(prog)s -h" for more information.'''), formatter_class=argparse.RawTextHelpFormatter, description=textwrap.dedent('''\ First, this program employs msmbuilder to featurize given pdb trajectories into vectorizable space. Second, the vector space is decompose by tICA or PCA to further reduce the dimension. Third, clustering is performed so that each structure in the trajectories is labeled by an index. Forth, Marcov State Model, albeit may not be well behaved, is built on the labeled trajectories. Last, FAST reward scores are calculated based on the transition-count matrix and user-chosen physical traits. Example: $ python FAST.py path_to_pdb_trajectories/ --featurizer=DRIDFeaturizer --decomposer=PCA --decomposer-n-components=5 --clusterer=KCenters --n-clusters=5 --msm-prior-counts=0.2 --physical-trait=target-RMSD --target-pdb=/path_to_target_pdb/target.pdb ''' )) parser.add_argument( 'pdbpath', help=textwrap.dedent('''[required] Path to pdb trajectories.''')) parser.add_argument( '--lag-time', default=1, type=int, help=textwrap.dedent('''Lag time of the model. Default value = 1.''')) parser.add_argument('--featurizer', default=None, type=str, help=textwrap.dedent('''\ Featurizer at your choice. Available featurizers are (select them by name): (1) RMSDFeaturizer; (2) DihedralFeaturizer, only phi and psi angles; (3) DRIDFeaturizer (DRID, Distribution of Reciprocal of Interatomic Distances); (4) ContactFeaturizer, CA contact. Note: user must choose a featurization method. Choose by name. ''') ) parser.add_argument('--decomposer', default=None, type=str, help=textwrap.dedent('''\ Decomposer at your choice. Available decomposers are: (1) tICA; (2) PCA. Note: selection of decomposer is not necessary but recommended. If not provided, program will ignore this step and cluster directly on raw features. ''' )) parser.add_argument( '--decomposer-n-components', default=None, type=int, help=textwrap.dedent( '''Number of components to keep. if n_components is not set all components are kept.''' )) parser.add_argument('--clusterer', default=None, type=str, help=textwrap.dedent('''\ Clustering method at your choice. Available clusterer are: (1) KMeans; (2) KCenters; (3) KMedoids; (4) MiniBatchKMeans; (5) MiniBatchKMedoids. Note: user must choose a clusering method. ''')) parser.add_argument( '--n-clusters', default=5, type=int, help=textwrap.dedent( '''The number of clusters to form as well as the number of centroids to generate.''' )) parser.add_argument('--msm-n-timescales', default=None, type=int, help=textwrap.dedent('''\ The number of dynamical timescales to calculate when diagonalizing the transition matrix. If not specified, it will compute n_states - 1. ''')) parser.add_argument('--msm-prior-counts', default=0, type=float, help=textwrap.dedent('''\ Add a number of 'pseudo counts' to each entry in the counts matrix after ergodic trimming. When prior_counts == 0 (default), the assigned transition probability between two states with no observed transitions will be zero, whereas when prior_counts > 0, even this unobserved transitions will be given nonzero probability. ''')) parser.add_argument('--physical-trait', default=None, type=str, help=textwrap.dedent('''\ Physical trait used in calculation of FAST reward score. Available choices are: (1) target-RMSD, if chosen, user must supply a target structure; (2) target-native-contact, if chosen, user must supply a target structure; (3) target-tmscore, if chosen, user must supply the data file containing the TM-scores in column; (4) potential, target free, if chosen, user must supply the data file containing the potentials in column; Note: user must choose a physical trait. ''')) parser.add_argument('--target-pdb', default=None, type=str, help=textwrap.dedent('''\ The target pdb structure. Note: The target pdb should have the same number of atoms in structure with that in pdb trajectories. ''' )) parser.add_argument('--initial-pdb', default=None, type=str, help=textwrap.dedent('''\ The initial pdb structure. Note: The initial pdb should have the same number of atoms in structure with that in pdb trajectories. ''' )) parser.add_argument( '--potential', default=None, type=str, help=textwrap.dedent( '''The potential file corresponding to the pdb trajectories. ''')) parser.add_argument( '--tmscore', default=None, type=str, help=textwrap.dedent( '''The TM-score file corresponding to the pdb trajectories. ''')) parser.add_argument( '--fast-n-simulations', default=30, type=int, help=textwrap.dedent( '''Number of parallel simulations in each round of FAST algorithm. Default value: 30. ''' )) parser.add_argument( '--fast-alpha', default=1., type=float, help=textwrap.dedent('''Number of clusters. Default value: 1.0.''')) parser.add_argument('--output', type=str, default='output', help=textwrap.dedent('''Output file name.''')) args = parser.parse_args() from msmbuilder.dataset import dataset coords = dataset(os.path.join(args.pdbpath, '*.pdb')) print '%i trajectories found. \n' % len(coords) ## featurize features = featurize_trajectories(coords, args.featurizer) print "%s selected" % args.featurizer print "features: (n_samples, n_features) = (%i, %i) for each trajectory \n" % ( features[0].shape[0], features[0].shape[1]) ## decompose if args.decomposer == None: print "No decomposer is selected! Program will directly cluster the raw features. \n" else: features = decompose_features( features, args.decomposer, n_components=args.decomposer_n_components, lag_time=args.lag_time) print "%s selected" % args.decomposer print "features: (n_samples, n_features) = (%i, %i) for each trajectory \n" % ( features[0].shape[0], features[0].shape[1]) ## clustering clst = cluster_features(features, args.clusterer, n_clusters=args.n_clusters) cci = find_cluster_center_indices(features, clst) print "%s selected" % args.clusterer print "Cluster center indices: %s \n" % cci ## build msm #msm = build_msm(clst.labels_, lag_time=args.lag_time, n_timescales=args.msm_n_timescales, prior_counts=args.msm_prior_counts) #print msm, '\n' #print "Transition count matrix: \n %s \n" % msm.countsmat_ #print "Relative population of each state: %s \n" % msm.populations_ ## construct transition count matrix transition_count_mat = calc_transition_count_mat( np.concatenate(clst.labels_), args.n_clusters) print 'Transition count matrix: \n', transition_count_mat #### calculate FAST reward score output_df = pd.DataFrame() output_df['idx'] = cci output_df['#cluster'] = transition_count_mat.diagonal() if args.initial_pdb != None: import mdtraj as md initial = md.load(args.initial_pdb) from msmbuilder.featurizer import RMSDFeaturizer rmsd_to_initial = np.concatenate( RMSDFeaturizer(initial).fit_transform(coords))[:, 0] output_df['iniRMSD'] = rmsd_to_initial[cci] if args.target_pdb != None: import mdtraj as md target = md.load(args.target_pdb) from msmbuilder.featurizer import RMSDFeaturizer rmsd_to_target = np.concatenate( RMSDFeaturizer(target).fit_transform(coords))[:, 0] native_contact_dists, native_contact_pairs = md.compute_contacts( target, scheme='ca') native_contact_pairs = native_contact_pairs[np.where( native_contact_dists[0] <= 0.75)] print "Target structure has %i pairs of CA-CA contact in total. \n" % len( native_contact_pairs) from msmbuilder.featurizer import ContactFeaturizer native_contact_to_target = np.concatenate( ContactFeaturizer( contacts=native_contact_pairs, scheme='ca').fit_transform(coords)) # (n_samples, n_pairs) native_contact_to_target = np.select([ native_contact_to_target <= 0.75, native_contact_to_target > 0.75 ], [1, 0]) native_contact_to_target = np.sum(native_contact_to_target, axis=1) output_df['tarRMSD'] = rmsd_to_target[cci] output_df['#NativeContact'] = native_contact_to_target[cci] if args.potential != None: potential = np.loadtxt(args.potential) output_df['potential'] = potential[cci] if args.tmscore != None: tmscore = np.loadtxt(args.tmscore) output_df['tmscore'] = tmscore[cci] # choose physical trait print "%s is selected in FAST \n" % args.physical_trait if args.physical_trait == 'target-RMSD': if args.target_pdb == None: print "User must provide a target structure! \n" rewards, sims, c = calc_FAST_reward_score( rmsd_to_target, cci, transition_count_mat, alpha=args.fast_alpha, n_simulations=args.fast_n_simulations, minmax='min') elif args.physical_trait == 'target-native-contact': if args.target_pdb == None: print "User must provide a target structure! \n" rewards, sims, c = calc_FAST_reward_score( native_contact_to_target, cci, transition_count_mat, alpha=args.fast_alpha, n_simulations=args.fast_n_simulations, minmax='max') elif args.physical_trait == 'target-tmscore': if args.tmscore == None: print "User must provide a TM-score file corresponding to the pdb trajectories! \n" rewards, sims, c = calc_FAST_reward_score( tmscore, cci, transition_count_mat, alpha=args.fast_alpha, n_simulations=args.fast_n_simulations, minmax='max') elif args.physical_trait == 'potential': if args.potential == None: print "User must provide a potential file corresponding to the pdb trajectories! \n" rewards, sims, c = calc_FAST_reward_score( potential, cci, transition_count_mat, alpha=args.fast_alpha, n_simulations=args.fast_n_simulations, minmax='min') output_df['#Transition'] = c output_df['reward'] = rewards output_df['#sim'] = sims ## output with open(args.output + '.CenterIdx_ClusterSize.dat', 'w') as f: for i in range(args.n_clusters): print >> f, '%6i %6i' % (cci[i], sims[i]) if args.initial_pdb != None: with open(args.output + '.iniRMSD.dat', 'w') as f: for ele in rmsd_to_initial: print >> f, '%8.3f' % ele if args.target_pdb != None: with open(args.output + '.tarRMSD.dat', 'w') as f: for ele in rmsd_to_target: print >> f, '%8.3f' % ele with open(args.output + '.tarNativeContact.dat', 'w') as f: for ele in native_contact_to_target: print >> f, '%8.3f' % ele with open(args.output + '.dat', 'w') as f: print >> f, output_df ## plot if args.target_pdb != None: plot_cluster(X=rmsd_to_target, Y=native_contact_to_target, cluster_center_indices=cci, figname=args.output + '.tarRMSD_tarNativeContact.png', x_label='RMSD to target / nm', y_label='# native contact', xmin=0, xmax=ceil(rmsd_to_target.max(), 0), ymin=0, ymax=ceil(native_contact_to_target.max()), c_map='winter', cc_color='red') if args.initial_pdb != None: plot_cluster(X=rmsd_to_initial, Y=rmsd_to_target, cluster_center_indices=cci, figname=args.output + '.tarRMSD_iniRMSD.png', x_label='RMSD to initial / nm', y_label='RMSD to target / nm', xmin=0, xmax=ceil(rmsd_to_target.max(), 0), ymin=0, ymax=ceil(rmsd_to_initial.max(), 0), c_map='winter', cc_color='red') if args.tmscore != None: plot_cluster(X=tmscore, Y=native_contact_to_target, cluster_center_indices=cci, figname=args.output + '.tmscore_tarNativeContact.png', x_label='TM-score to target', y_label='# native contact', xmin=0, xmax=1, ymin=0, ymax=ceil(native_contact_to_target.max()), c_map='winter', cc_color='red') if args.potential != None: plot_cluster(X=tmscore, Y=potential, cluster_center_indices=cci, figname=args.output + '.tmscore_potential.png', x_label='TM-score to target', y_label='potential', xmin=0, xmax=1, ymin=floor(potential.min()), ymax=ceil(potential.max()), c_map='winter', cc_color='red') if args.potential != None: plot_cluster(X=rmsd_to_target, Y=potential, cluster_center_indices=cci, figname=args.output + '.tarRMSD_potential.png', x_label='RMSD to target / nm', y_label='potential', xmin=0, xmax=ceil(rmsd_to_target.max(), 0), ymin=floor(potential.min()), ymax=ceil(potential.max()), c_map='winter', cc_color='red') if args.decomposer == 'tICA': cat_features = np.concatenate(features) plot_cluster(X=cat_features[:, 0], Y=cat_features[:, 1], cluster_center_indices=cci, figname=args.output + '.tICA_1st_2nd.png', x_label='tIC 1', y_label='tIC 2', xmin=floor(cat_features[:, 0].min()), xmax=ceil(cat_features[:, 0].max()), ymin=floor(cat_features[:, 1].min()), ymax=ceil(cat_features[:, 1].max()), c_map='winter', cc_color='red') elif args.decomposer == 'PCA': cat_features = np.concatenate(features) plot_cluster(X=cat_features[:, 0], Y=cat_features[:, 1], cluster_center_indices=cci, figname=args.output + '.PCA_1st_2nd.png', x_label='PC 1', y_label='PC 2', xmin=floor(cat_features[:, 0].min()), xmax=ceil(cat_features[:, 0].max()), ymin=floor(cat_features[:, 1].min()), ymax=ceil(cat_features[:, 1].max()), c_map='winter', cc_color='red')
{{header}} Meta ---- depends: - meta.pandas.pickl - trajs - top.pdb """ import mdtraj as md from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top ## Load meta = load_meta() centroids = md.load("centroids.xtc", top=preload_top(meta)) ## Kernel SIGMA = 0.3 # nm from msmbuilder.featurizer import RMSDFeaturizer import numpy as np featurizer = RMSDFeaturizer(centroids) lfeats = {} for i, traj in itertrajs(meta): lfeat = featurizer.partial_transform(traj) lfeat = np.exp(-lfeat ** 2 / (2 * (SIGMA ** 2))) lfeats[i] = lfeat save_trajs(lfeats, 'ftrajs', meta)
def main(): import argparse, textwrap parser = argparse.ArgumentParser( usage=textwrap.dedent( '''Use "python %(prog)s -h" for more information.'''), formatter_class=argparse.RawTextHelpFormatter, description=textwrap.dedent('''\ First, this program employs msmbuilder to featurize given pdb trajectories into vectorizable space. Second, the vector space is decompose by tICA or PCA to further reduce the dimension. Third, clustering is performed so that each structure in the trajectories is labeled by an index. Example: $ python Traj-to-cluster.py \n path_to_pdb_trajectories/ \n --featurizer=DRIDFeaturizer \n --decomposer=PCA \n --decomposer-n-components=5 \n --clusterer=KCenters \n --n-clusters=5 ''')) parser.add_argument( 'pdbpath', help=textwrap.dedent('''[required] Path to pdb trajectories.''')) parser.add_argument( '--lag-time', default=1, type=int, help=textwrap.dedent('''Lag time of the model. Default value = 1.''')) parser.add_argument('--featurizer', default=None, type=str, help=textwrap.dedent('''\ Featurizer at your choice. Available featurizers are (select them by name): (1) RMSDFeaturizer; (2) DihedralFeaturizer, only phi and psi angles; (3) DRIDFeaturizer (DRID, Distribution of Reciprocal of Interatomic Distances); (4) ContactFeaturizer, CA contact. Note: user must choose a featurization method. Choose by name. ''') ) parser.add_argument('--decomposer', default=None, type=str, help=textwrap.dedent('''\ Decomposer at your choice. Available decomposers are: (1) tICA; (2) PCA. Note: selection of decomposer is not necessary but recommended. If not provided, program will ignore this step and cluster directly on raw features. ''' )) parser.add_argument( '--decomposer-n-components', default=None, type=int, help=textwrap.dedent( '''Number of components to keep. if n_components is not set all components are kept.''' )) parser.add_argument('--clusterer', default=None, type=str, help=textwrap.dedent('''\ Clustering method at your choice. Available clusterer are: (1) KMeans; (2) KCenters; (3) KMedoids; (4) MiniBatchKMeans; (5) MiniBatchKMedoids. Note: user must choose a clusering method. ''')) parser.add_argument( '--n-clusters', default=5, type=int, help=textwrap.dedent( '''The number of clusters to form as well as the number of centroids to generate.''' )) parser.add_argument( '--reference-model', default=[], action='append', type=str, help=textwrap.dedent(''' Reference models used to calculate RMSD. ''')) parser.add_argument('--output', type=str, default='output', help=textwrap.dedent('''Output file name.''')) args = parser.parse_args() from msmbuilder.dataset import dataset coords = dataset(os.path.join(args.pdbpath, '*.pdb')) # coords: 'MDTrajDataset' object print '%i trajectories found. \n' % len(coords) ## featurize features = featurize_trajectories(coords, args.featurizer) print "%s selected" % args.featurizer print "features: (n_samples, n_features) = (%i, %i) for each trajectory \n" % ( features[0].shape[0], features[0].shape[1]) with open(args.output + '.features.%s.pkl' % args.featurizer, 'w') as f: cp.dump(toNumpy32(features), f, -1) sys.stdout.flush() ## decompose if args.decomposer == None: print "No decomposer is selected! Program will directly cluster the raw coordinates. \n" else: features, components = decompose_features( features, args.decomposer, n_components=args.decomposer_n_components, lag_time=args.lag_time) print "%s selected" % args.decomposer print "features: (n_samples, n_features) = (%i, %i) for each trajectory \n" % ( features[0].shape[0], features[0].shape[1]) ## clustering clst = cluster_features(features, args.clusterer, n_clusters=args.n_clusters) cci = find_cluster_center_indices(features, clst) print "%s selected" % args.clusterer print "Cluster center indices: %s \n" % cci cat_features = np.concatenate(features) cat_labels = np.concatenate(clst.labels_) ## reference pdb if args.reference_model != None: import mdtraj as md from msmbuilder.featurizer import RMSDFeaturizer rmsd_to_ref = [] for ref in args.reference_model: print "\nCompute RMSD to the reference models : %s." % ref ref_traj = md.load(ref) print "N atoms %i" % ref_traj.n_atoms rmsd_to_ref.append( np.concatenate( RMSDFeaturizer(ref_traj[0]).fit_transform(coords))[:, 0]) rmsd_to_ref = np.array(rmsd_to_ref) * 10 # unit : A for i in range(len(args.reference_model)): with open( args.output + '.ref_RMSD_%s.dat' % bsnm(args.reference_model[i]), 'w') as f: for e in rmsd_to_ref[i]: print >> f, '%.3f' % e plot_cluster(X=rmsd_to_ref[0], Y=rmsd_to_ref[1], cluster_center_indices=cci, figname=args.output + '.ref_RMSD.png', x_label='state 1', y_label='state 2', xmin=0, xmax=ceil(rmsd_to_ref.max()), ymin=0, ymax=ceil(rmsd_to_ref.max()), c_map='winter', cc_color='red') ## output with open(args.output + '.labels.pkl', 'w') as f: cp.dump(cat_labels, f, -1) with open(args.output + '.components.pkl', 'w') as f: cp.dump(toNumpy32(components), f, -1) with open(args.output + '.cluster_center_idx.dat', 'w') as f: for i in range(args.n_clusters): print >> f, '%6i' % cci[i] if args.decomposer == 'tICA': plot_cluster(X=cat_features[:, 0], Y=cat_features[:, 1], cluster_center_indices=cci, figname=args.output + '.tICA_1st_2nd.png', x_label='tIC 1', y_label='tIC 2', xmin=floor(cat_features[:, 0].min()), xmax=ceil(cat_features[:, 0].max()), ymin=floor(cat_features[:, 1].min()), ymax=ceil(cat_features[:, 1].max()), c_map='winter', cc_color='red') elif args.decomposer == 'PCA': plot_cluster(X=cat_features[:, 0], Y=cat_features[:, 1], cluster_center_indices=cci, figname=args.output + '.PCA_1st_2nd.png', x_label='PC 1', y_label='PC 2', xmin=floor(cat_features[:, 0].min()), xmax=ceil(cat_features[:, 0].max()), ymin=floor(cat_features[:, 1].min()), ymax=ceil(cat_features[:, 1].max()), c_map='winter', cc_color='red')
""" Trace Plot ========== """ from msmbuilder.example_datasets import FsPeptide from msmbuilder.featurizer import RMSDFeaturizer import msmexplorer as msme # Load Fs Peptide Data traj = FsPeptide().get().trajectories[0] # Calculate RMSD featurizer = RMSDFeaturizer(reference_traj=traj[0]) rmsd = featurizer.partial_transform(traj).flatten() # Plot Trace msme.plot_trace(rmsd, label='traj0', xlabel='Timestep', ylabel='RMSD (nm)')
def calculate_rmsd_mat_mdtraj(com_traj, ref_traj): from msmbuilder.featurizer import RMSDFeaturizer return RMSDFeaturizer(ref_traj).partial_transform(com_traj)
{{header}} Meta ---- depends: - meta.pandas.pickl - trajs - top.pdb """ import mdtraj as md from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top ## Load meta = load_meta() centroids = md.load("centroids.xtc", top=preload_top(meta)) ## Kernel SIGMA = 0.3 # nm from msmbuilder.featurizer import RMSDFeaturizer import numpy as np featurizer = RMSDFeaturizer(centroids) lfeats = {} for i, traj in itertrajs(meta): lfeat = featurizer.partial_transform(traj) lfeat = np.exp(-lfeat**2 / (2 * (SIGMA**2))) lfeats[i] = lfeat save_trajs(lfeats, 'ftrajs', meta)
from msmbuilder.featurizer import RMSDFeaturizer import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt from multiprocessing import Pool from utilities import msmb_feat import numpy as np import seaborn as sns import pandas as pd import sys meta = load_meta() tops = preload_tops(meta) ref = md.load('topology.pdb') feat = RMSDFeaturizer(reference_traj=ref) args = zip(meta.iterrows(), [feat] * meta.shape[0], [tops] * meta.shape[0]) with Pool() as pool: ftrajs = dict(pool.imap_unordered(msmb_feat, args)) # Squeeze and extend short trajectories with zeros # MSMBuilder does rmsd in nm so multiply by 10 to get angstroms nframes = int(np.max(meta['nframes'].unique()[0])) ns_to_ang = 10 rtrajs = {} for k, v in ftrajs.items(): v = np.squeeze(v) diff = nframes - v.shape[0] v = np.append(v, np.zeros(diff) + np.nan) * ns_to_ang