Example #1
0
class PCACommand(Command):
    name = 'pca'
    _group = 0
    _concrete = True
    description = 'Compute 2D projection with principle components analysis (PCA).'

    g = argument_group('required argument')
    g.add_argument('--featurizer',
                   required=True,
                   help='''Path to a featurizer
        pickle. These can be created with the 'hmsm featurizer' command in
        mixtape.''')
    a1 = argument('trajectories',
                  nargs='+',
                  help='''Path to one or more MD
        trajectory files or glob patterns that match MD trajectory files.''')
    a2 = argument('--top', '--topology', help='Path to topology file')
    a2 = argument('--out',
                  default='pca-projection.h5',
                  help='''The results
        will be saved to this path as a .h5 file using mdtraj.io.saveh().
        (default=pca-projection.h5)''')

    def __init__(self, args):
        self.args = args
        from sklearn.decomposition import PCA
        self.model = PCA(n_components=2)
        self.labels = [b'PC1', b'PC2']

    def start(self):
        import pickle
        import mdtraj as md
        from mdtraj import io
        from glob import glob
        import numpy as np

        featurizer = np.load(self.args.featurizer)
        topology = md.load(self.args.top)
        filenames = [fn for t in self.args.trajectories for fn in glob(t)]

        X, indices, fns = featurize_all(filenames, featurizer, topology)
        y = self.model.fit_transform([X])
        fns = np.array([fn.encode('utf-8') for fn in fns])

        io.saveh(self.args.out,
                 X=y[0],
                 indices=indices,
                 fns=fns,
                 labels=np.array(self.labels),
                 topology=np.array([pickle.dumps(topology)]),
                 featurizer=np.array([pickle.dumps(featurizer)]))
        print('Projection saved: %s' % self.args.out)
Example #2
0
class tICACommand(PCACommand):
    name = 'tica'
    _group = 0
    _concrete = True
    description = 'Compute 2D projection with time-structure independent components analyis (tICA)'

    g = argument_group('required arguments')
    g.add_argument('--featurizer',
                   required=True,
                   help='''Path to a featurizer
        pickle. These can be created with the 'hmsm featurizer' command in
        mixtape.''')
    g.add_argument('--lag-time',
                   required=True,
                   type=int,
                   help='''Delay time
        forward or backward in the input data. tICA is based on time-lagged
        correlations is computed between frames X[t] and X[t+offset]. `offset`
        is interpreted as an integer index -- its value in physical units
        depends entirely on the interval of time between the frames in your
        trajectory file''')

    a1 = argument('trajectories',
                  nargs='+',
                  help='''Path to one or more MD
        trajectory files or glob patterns that match MD trajectory files.''')
    a2 = argument('--top', '--topology', help='Path to topology file')
    a3 = argument('--out',
                  default='tica-projection.h5',
                  help='''The results
        will be saved to this path as a .h5 file using mdtraj.io.saveh().
        (default=pca-projection.h5)''')

    def __init__(self, args):
        from msmbuilder.decomposition import tICA
        if args.lag_time <= 0:
            self.error('offset must be greater than or equal to zero')
        self.args = args
        self.model = tICA(n_components=2, lag_time=self.args.lag_time)
        self.labels = [b'tIC1', b'tIC2']
Example #3
0
class PlotCommand(Command, Flask):
    name = 'plot'
    _group = 1
    _concrete = True
    description = 'Launch the interactive plot of a projection in the browser.'
    a1 = argument('projection-file')
    a2 = argument('--n-bins', type=int, default=50)
    a3 = argument('--debug', action=FlagAction, default=False)
    a4 = argument('--progressive', action=FlagAction, default=False)

    def __init__(self, args):
        import pylru
        from mdtraj import io
        from scipy.spatial import cKDTree

        self.args = args
        self.data = io.loadh(args.__dict__['projection-file'], deferred=False)
        self.kdtree = cKDTree(self.data['X'])

        self.top = pickle.loads(self.data['topology'][0])
        self.top.center_coordinates()
        self.topology_pdb_sring = pdb_string(self.top)
        self.alpha_carbon_indices = np.array(
            [a.index for a in self.top.top.atoms if a.name == 'CA'])

        self._traj_cache = pylru.lrucache(size=100)
        self._last_index = 0

        static_folder = os.path.join(os.path.dirname(__file__), 'static')
        s = super(PlotCommand, self) if six.PY2 else super()
        s.__init__(__name__, static_folder=static_folder)

    def start(self):

        self.add_url_rule('/', 'handle_index', self.handle_index)
        self.add_url_rule('/js/<path:path>', 'handle_js', self.handle_js)
        self.add_url_rule('/css/<path:path>', 'handle_css', self.handle_css)

        self.add_url_rule('/pdb', 'handle_pdb', self.handle_pdb)
        self.add_url_rule('/heatmap.json', 'handle_heatmap_json',
                          self.handle_heatmap_json)
        self.add_url_rule('/xy', 'handle_xy', self.handle_xy)

        self.handle_heatmap_json()
        print('\n', '=' * 20, 'OPEN YOUR BROWSER TO SEE THE PLOT', '=' * 20)
        self.run(debug=self.args.debug)

    def load_frame(self, filename, index):
        import mdtraj as md

        if filename not in self._traj_cache:
            print('loading %s...' % filename)
            kwargs = {} if os.path.splitext(filename)[1] in {
                '.h5', '.lh5', '.pdb'
            } else {
                'top': self.top
            }
            self._traj_cache[filename] = md.load(filename, **kwargs)
            self._traj_cache[filename].center_coordinates()

            print('...done loading')

        return self._traj_cache[filename][index]

    def compute_secondary(self, frame):
        dssp = md.compute_dssp(frame, simplified=True)[0]
        helices, sheets = [], []

        for k, g in groupby(enumerate(dssp), operator.itemgetter(1)):
            indices, keys = list(zip(*g))
            start_residue = indices[0]
            end_residue = indices[-1]
            run = [
                CHAIN_NAMES[self.top.topology.residue(
                    start_residue).chain.index], start_residue, CHAIN_NAMES[
                        self.top.topology.residue(end_residue).chain.index],
                end_residue
            ]
            if k == 'H':
                helices.append(run)
            elif k == 'E':
                sheets.append(run)
        return helices, sheets

    # -------------------------------------------------------------------------#

    def handle_index(self):
        return self.send_static_file('index.html')

    def handle_js(self, path):
        return self.send_static_file(os.path.join('js', path))

    def handle_css(self, path):
        return self.send_static_file(os.path.join('css', path))

    def handle_pdb(self):
        helices, sheets = self.compute_secondary(self.top)
        return json.dumps({
            'pdbstring': self.topology_pdb_sring,
            'helices': helices,
            'sheets': sheets
        })

    def handle_heatmap_json(self):
        x = self.data['X'][:, 0]
        y = self.data['X'][:, 1]

        heatmap, xedges, yedges = np.histogram2d(x, -y, bins=self.args.n_bins)

        return json.dumps({
            'heatmap': heatmap.T.tolist(),
            'vmax': float(np.max(heatmap)),
            'extent': {
                'xmin': xedges[0],
                'xmax': xedges[-1],
                'ymin': -yedges[-1],
                'ymax': -yedges[0]
            }
        })

    def handle_xy(self):
        x = float(request.args.get('x', 0))
        y = float(request.args.get('y', 0))

        _, index = self.kdtree.query(x=[x, y], k=1)

        frame = self.load_frame(self.data['fns'][index].decode('utf-8'),
                                self.data['indices'][index])

        if self.args.progressive:
            oldframe = self.load_frame(self.data['fns'][self._last_index],
                                       self.data['indices'][self._last_index])
            superpose_target = oldframe
            self._last_index = index
        else:
            superpose_target = self.top

        frame.superpose(superpose_target,
                        atom_indices=self.alpha_carbon_indices)
        helices, sheets = self.compute_secondary(frame)
        # convert to angstroms
        xyz = frame.xyz[0] * 10.0

        return json.dumps({
            'x': xyz[:, 0].tolist(),
            'y': xyz[:, 1].tolist(),
            'z': xyz[:, 2].tolist(),
            'helices': helices,
            'sheets': sheets,
        })
Example #4
0
class WriteVMDCommand(Command):
    """Write out data in VMD-compatible format.

    Note that for full generality, this command will write out
    a n_frames x n_atoms plaintext file for reading by VMD. This file
    could be quite large.

    The resulting tcl script will load the molecule and then read the
    visualization data into it. To load it from the command line use

        $ vmd -e out_prefix.tcl

    To use it from an already-running instance of VMD:

        >>> source out_prefix.tcl

    If you already have your molecule loaded, delete the first two lines
    of the output script and set the $mol variable to your molecule. For
    example:

        >>> set mol top
        >>> source out_prefix.modified.tcl
    """
    _concrete = True
    _group = 'SolventShells4'
    description = __doc__

    @classmethod
    def _get_name(cls):
        return "SolventWriteVMD"

    dataset = argument("-ds", "--dataset",
             help="Path to the dataset.",
             required=True)
    out_prefix = argument('-o', '--out_prefix',
             help="Prefix output files with this")
    traj = argument('--trj',
             help="Trajectory to load in VMD",
             default="traj.dcd")
    top = argument('--top',
             help="Topology to load in VMD",
             default="top.pdb")
    stride = argument('--stride',
             help="Stride by this when loading in VMD. Match this to"
                  " whatever value you used in ApplyComponents",
             type=int, default=1)


    def __init__(self, args):
        self.dataset = args.dataset
        self.out_prefix = args.out_prefix
        self.traj = args.trj
        self.top = args.top
        self.stride = args.stride

    def start(self):
        ds = dataset(self.dataset, mode='r')
        assert len(ds) == 1, "Only support one at a time for now"
        ds = ds[0]

        dat_fn = "{}.txt".format(self.out_prefix)
        tcl_fn = "{}.tcl".format(self.out_prefix)
        np.savetxt(dat_fn, ds, fmt="%.5f")
        with open(tcl_fn, 'w') as f:
            f.write(VMDSCRIPT.format(
                traj_fn=self.traj, step=self.stride, top_fn=self.top,
                dat_fn=dat_fn
            ))
Example #5
0
class ApplyComponentsCommand(NumpydocClassCommand):
    klass = ApplyComponents
    _concrete = True
    _group = 'SolventShells3'

    @classmethod
    def _get_name(cls):
        return "SolventApplyComponents"

    trjs = argument(
        '--trjs', help='Glob pattern for trajectories',
        default='', required=True)
    top = argument(
        '--top', help='Path to topology file matching the trajectories',
        default='')
    out = argument(
        '--out', required=True, help='Output path', type=exttype('/'))
    assignments = argument(
        '--assignments', help="Assignments dataset from SolventShellsAssigner",
        required=True
    )

    def _solvent_indices_type(self, fn):
        if fn is None:
            return None
        return np.loadtxt(fn, dtype=np.int, ndmin=1)

    def _solute_indices_type(self, fn):
        if fn is None:
            return None
        return np.loadtxt(fn, dtype=np.int, ndmin=1)

    def _component_type(self, spec):
        if spec is None:
            return None
        spec_split = spec.split(':')
        if len(spec_split) > 1:
            fn = ':'.join(spec_split[:-1])
            comp_i = int(spec_split[-1])
        else:
            fn = spec_split[0]
            comp_i = 0

        obj = utils.load(fn)
        component = obj.components_[comp_i]
        component = component ** 2
        component = component / np.max(component)
        return component


    def start(self):
        if os.path.exists(self.out):
            self.error('File exists: %s' % self.out)

        print(self.instance)
        if os.path.exists(os.path.expanduser(self.top)):
            top = os.path.expanduser(self.top)
        else:
            top = None

        traj_dataset = MDTrajDataset(self.trjs, topology=top,
                                     stride=self.instance.stride, verbose=False)

        with dataset(self.assignments, mode='r') as assn_dataset:
            out_dataset = assn_dataset.create_derived(self.out, fmt='dir-npy')
            pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()],
                               maxval=len(assn_dataset)).start()
            for tr_key, as_key in pbar(
                    zip(traj_dataset.keys(), assn_dataset.keys())
            ):
                out_dataset[as_key] = self.instance.partial_transform(
                    (traj_dataset[tr_key], assn_dataset[as_key])
                )
            out_dataset.close()

        print("\nSaving transformed dataset to '%s'" % self.out)
        print("To load this dataset interactive inside an IPython")
        print("shell or notebook, run\n")
        print("  $ ipython")
        print("  >>> from msmbuilder.dataset import dataset")
        print("  >>> ds = dataset('%s')\n" % self.out)