class PCACommand(Command): name = 'pca' _group = 0 _concrete = True description = 'Compute 2D projection with principle components analysis (PCA).' g = argument_group('required argument') g.add_argument('--featurizer', required=True, help='''Path to a featurizer pickle. These can be created with the 'hmsm featurizer' command in mixtape.''') a1 = argument('trajectories', nargs='+', help='''Path to one or more MD trajectory files or glob patterns that match MD trajectory files.''') a2 = argument('--top', '--topology', help='Path to topology file') a2 = argument('--out', default='pca-projection.h5', help='''The results will be saved to this path as a .h5 file using mdtraj.io.saveh(). (default=pca-projection.h5)''') def __init__(self, args): self.args = args from sklearn.decomposition import PCA self.model = PCA(n_components=2) self.labels = [b'PC1', b'PC2'] def start(self): import pickle import mdtraj as md from mdtraj import io from glob import glob import numpy as np featurizer = np.load(self.args.featurizer) topology = md.load(self.args.top) filenames = [fn for t in self.args.trajectories for fn in glob(t)] X, indices, fns = featurize_all(filenames, featurizer, topology) y = self.model.fit_transform([X]) fns = np.array([fn.encode('utf-8') for fn in fns]) io.saveh(self.args.out, X=y[0], indices=indices, fns=fns, labels=np.array(self.labels), topology=np.array([pickle.dumps(topology)]), featurizer=np.array([pickle.dumps(featurizer)])) print('Projection saved: %s' % self.args.out)
class tICACommand(PCACommand): name = 'tica' _group = 0 _concrete = True description = 'Compute 2D projection with time-structure independent components analyis (tICA)' g = argument_group('required arguments') g.add_argument('--featurizer', required=True, help='''Path to a featurizer pickle. These can be created with the 'hmsm featurizer' command in mixtape.''') g.add_argument('--lag-time', required=True, type=int, help='''Delay time forward or backward in the input data. tICA is based on time-lagged correlations is computed between frames X[t] and X[t+offset]. `offset` is interpreted as an integer index -- its value in physical units depends entirely on the interval of time between the frames in your trajectory file''') a1 = argument('trajectories', nargs='+', help='''Path to one or more MD trajectory files or glob patterns that match MD trajectory files.''') a2 = argument('--top', '--topology', help='Path to topology file') a3 = argument('--out', default='tica-projection.h5', help='''The results will be saved to this path as a .h5 file using mdtraj.io.saveh(). (default=pca-projection.h5)''') def __init__(self, args): from msmbuilder.decomposition import tICA if args.lag_time <= 0: self.error('offset must be greater than or equal to zero') self.args = args self.model = tICA(n_components=2, lag_time=self.args.lag_time) self.labels = [b'tIC1', b'tIC2']
class PlotCommand(Command, Flask): name = 'plot' _group = 1 _concrete = True description = 'Launch the interactive plot of a projection in the browser.' a1 = argument('projection-file') a2 = argument('--n-bins', type=int, default=50) a3 = argument('--debug', action=FlagAction, default=False) a4 = argument('--progressive', action=FlagAction, default=False) def __init__(self, args): import pylru from mdtraj import io from scipy.spatial import cKDTree self.args = args self.data = io.loadh(args.__dict__['projection-file'], deferred=False) self.kdtree = cKDTree(self.data['X']) self.top = pickle.loads(self.data['topology'][0]) self.top.center_coordinates() self.topology_pdb_sring = pdb_string(self.top) self.alpha_carbon_indices = np.array( [a.index for a in self.top.top.atoms if a.name == 'CA']) self._traj_cache = pylru.lrucache(size=100) self._last_index = 0 static_folder = os.path.join(os.path.dirname(__file__), 'static') s = super(PlotCommand, self) if six.PY2 else super() s.__init__(__name__, static_folder=static_folder) def start(self): self.add_url_rule('/', 'handle_index', self.handle_index) self.add_url_rule('/js/<path:path>', 'handle_js', self.handle_js) self.add_url_rule('/css/<path:path>', 'handle_css', self.handle_css) self.add_url_rule('/pdb', 'handle_pdb', self.handle_pdb) self.add_url_rule('/heatmap.json', 'handle_heatmap_json', self.handle_heatmap_json) self.add_url_rule('/xy', 'handle_xy', self.handle_xy) self.handle_heatmap_json() print('\n', '=' * 20, 'OPEN YOUR BROWSER TO SEE THE PLOT', '=' * 20) self.run(debug=self.args.debug) def load_frame(self, filename, index): import mdtraj as md if filename not in self._traj_cache: print('loading %s...' % filename) kwargs = {} if os.path.splitext(filename)[1] in { '.h5', '.lh5', '.pdb' } else { 'top': self.top } self._traj_cache[filename] = md.load(filename, **kwargs) self._traj_cache[filename].center_coordinates() print('...done loading') return self._traj_cache[filename][index] def compute_secondary(self, frame): dssp = md.compute_dssp(frame, simplified=True)[0] helices, sheets = [], [] for k, g in groupby(enumerate(dssp), operator.itemgetter(1)): indices, keys = list(zip(*g)) start_residue = indices[0] end_residue = indices[-1] run = [ CHAIN_NAMES[self.top.topology.residue( start_residue).chain.index], start_residue, CHAIN_NAMES[ self.top.topology.residue(end_residue).chain.index], end_residue ] if k == 'H': helices.append(run) elif k == 'E': sheets.append(run) return helices, sheets # -------------------------------------------------------------------------# def handle_index(self): return self.send_static_file('index.html') def handle_js(self, path): return self.send_static_file(os.path.join('js', path)) def handle_css(self, path): return self.send_static_file(os.path.join('css', path)) def handle_pdb(self): helices, sheets = self.compute_secondary(self.top) return json.dumps({ 'pdbstring': self.topology_pdb_sring, 'helices': helices, 'sheets': sheets }) def handle_heatmap_json(self): x = self.data['X'][:, 0] y = self.data['X'][:, 1] heatmap, xedges, yedges = np.histogram2d(x, -y, bins=self.args.n_bins) return json.dumps({ 'heatmap': heatmap.T.tolist(), 'vmax': float(np.max(heatmap)), 'extent': { 'xmin': xedges[0], 'xmax': xedges[-1], 'ymin': -yedges[-1], 'ymax': -yedges[0] } }) def handle_xy(self): x = float(request.args.get('x', 0)) y = float(request.args.get('y', 0)) _, index = self.kdtree.query(x=[x, y], k=1) frame = self.load_frame(self.data['fns'][index].decode('utf-8'), self.data['indices'][index]) if self.args.progressive: oldframe = self.load_frame(self.data['fns'][self._last_index], self.data['indices'][self._last_index]) superpose_target = oldframe self._last_index = index else: superpose_target = self.top frame.superpose(superpose_target, atom_indices=self.alpha_carbon_indices) helices, sheets = self.compute_secondary(frame) # convert to angstroms xyz = frame.xyz[0] * 10.0 return json.dumps({ 'x': xyz[:, 0].tolist(), 'y': xyz[:, 1].tolist(), 'z': xyz[:, 2].tolist(), 'helices': helices, 'sheets': sheets, })
class WriteVMDCommand(Command): """Write out data in VMD-compatible format. Note that for full generality, this command will write out a n_frames x n_atoms plaintext file for reading by VMD. This file could be quite large. The resulting tcl script will load the molecule and then read the visualization data into it. To load it from the command line use $ vmd -e out_prefix.tcl To use it from an already-running instance of VMD: >>> source out_prefix.tcl If you already have your molecule loaded, delete the first two lines of the output script and set the $mol variable to your molecule. For example: >>> set mol top >>> source out_prefix.modified.tcl """ _concrete = True _group = 'SolventShells4' description = __doc__ @classmethod def _get_name(cls): return "SolventWriteVMD" dataset = argument("-ds", "--dataset", help="Path to the dataset.", required=True) out_prefix = argument('-o', '--out_prefix', help="Prefix output files with this") traj = argument('--trj', help="Trajectory to load in VMD", default="traj.dcd") top = argument('--top', help="Topology to load in VMD", default="top.pdb") stride = argument('--stride', help="Stride by this when loading in VMD. Match this to" " whatever value you used in ApplyComponents", type=int, default=1) def __init__(self, args): self.dataset = args.dataset self.out_prefix = args.out_prefix self.traj = args.trj self.top = args.top self.stride = args.stride def start(self): ds = dataset(self.dataset, mode='r') assert len(ds) == 1, "Only support one at a time for now" ds = ds[0] dat_fn = "{}.txt".format(self.out_prefix) tcl_fn = "{}.tcl".format(self.out_prefix) np.savetxt(dat_fn, ds, fmt="%.5f") with open(tcl_fn, 'w') as f: f.write(VMDSCRIPT.format( traj_fn=self.traj, step=self.stride, top_fn=self.top, dat_fn=dat_fn ))
class ApplyComponentsCommand(NumpydocClassCommand): klass = ApplyComponents _concrete = True _group = 'SolventShells3' @classmethod def _get_name(cls): return "SolventApplyComponents" trjs = argument( '--trjs', help='Glob pattern for trajectories', default='', required=True) top = argument( '--top', help='Path to topology file matching the trajectories', default='') out = argument( '--out', required=True, help='Output path', type=exttype('/')) assignments = argument( '--assignments', help="Assignments dataset from SolventShellsAssigner", required=True ) def _solvent_indices_type(self, fn): if fn is None: return None return np.loadtxt(fn, dtype=np.int, ndmin=1) def _solute_indices_type(self, fn): if fn is None: return None return np.loadtxt(fn, dtype=np.int, ndmin=1) def _component_type(self, spec): if spec is None: return None spec_split = spec.split(':') if len(spec_split) > 1: fn = ':'.join(spec_split[:-1]) comp_i = int(spec_split[-1]) else: fn = spec_split[0] comp_i = 0 obj = utils.load(fn) component = obj.components_[comp_i] component = component ** 2 component = component / np.max(component) return component def start(self): if os.path.exists(self.out): self.error('File exists: %s' % self.out) print(self.instance) if os.path.exists(os.path.expanduser(self.top)): top = os.path.expanduser(self.top) else: top = None traj_dataset = MDTrajDataset(self.trjs, topology=top, stride=self.instance.stride, verbose=False) with dataset(self.assignments, mode='r') as assn_dataset: out_dataset = assn_dataset.create_derived(self.out, fmt='dir-npy') pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=len(assn_dataset)).start() for tr_key, as_key in pbar( zip(traj_dataset.keys(), assn_dataset.keys()) ): out_dataset[as_key] = self.instance.partial_transform( (traj_dataset[tr_key], assn_dataset[as_key]) ) out_dataset.close() print("\nSaving transformed dataset to '%s'" % self.out) print("To load this dataset interactive inside an IPython") print("shell or notebook, run\n") print(" $ ipython") print(" >>> from msmbuilder.dataset import dataset") print(" >>> ds = dataset('%s')\n" % self.out)