def featurize(featurizer, meta_data): tops = preload_tops(meta) def feat(irow): i, row = irow traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) feat_traj = featurizer.partial_transform(traj) return i, feat_traj feature_trajs = dict(map(feat, meta.iterrows())) save_trajs(feature_trajs, 'ftrajs', meta) save_generic(featurizer, 'featurizer.pickl') return feature_trajs
def sample_clusters(): meta = load_meta() tops = preload_tops(meta) print('Sampling trajectories') ref = md.load('topology.pdb') for i in range(int(num_clusters)): print(i) df_smp = df.ix[df['Trajectory']==i, ['Key', 'Time_ps']].sample(100) inds = zip(df_smp['Key'], df_smp['Time_ps']) # Use loc because sample_dimension is nice traj = md.join( md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=meta.loc[traj_i]['top_fn']) for traj_i, frame_i in inds ) # Original trajectories include both BT1 and BT2 so need to superpose traj.superpose(reference=ref) # Save traj_fn = "clusters/rmsd_cluster-{}.dcd".format(i) backup(traj_fn) traj.save(traj_fn)
from msmbuilder.featurizer import RawPositionsFeaturizer import numpy as np import mdtraj as md from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic from multiprocessing import Pool import matplotlib matplotlib.use('Agg') from matplotlib.pylab import plt from utilities import plot_box, feat if __name__ == '__main__': # Load meta = load_meta() tops = preload_tops(meta) # Select featurizer feature_name = 'Positions' reference = md.load('topology.pdb') featurizer = RawPositionsFeaturizer(ref_traj=reference) args = zip(meta.iterrows(), [featurizer] * meta.shape[0], [tops] * meta.shape[0]) # Do it in parallel with Pool() as pool: feature_trajs = dict(pool.imap_unordered(feat, args)) # Plot unscaled features ftrajs = np.concatenate([fx[::100] for fx in feature_trajs.values()])
Meta ---- depends: - meta.pandas.pickl - trajs - top.pdb """ import mdtraj as md from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic from multiprocessing import Pool ## Load meta = load_meta() tops = preload_tops(meta) dihed_feat = DihedralFeaturizer() ## Featurize logic def feat(irow): i, row = irow traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) feat_traj = dihed_feat.partial_transform(traj) return i, feat_traj ## Do it in parallel with Pool() as pool: dihed_trajs = dict(pool.imap_unordered(feat, meta.iterrows()))