def _setup_frames(self, trajectory, start=None, stop=None, step=None):
        """
        Pass a Reader object and define the desired iteration pattern
        through the trajectory

        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        """
        self._trajectory = trajectory
        # TODO: Remove once start/stop/step are deprecated from init
        # See if these have been set as class attributes, and use that
        start = getattr(self, 'start', start)
        stop = getattr(self, 'stop', stop)
        step = getattr(self, 'step', step)
        start, stop, step = trajectory.check_slice_indices(start, stop, step)
        self.start = start
        self.stop = stop
        self.step = step
        self.n_frames = len(range(start, stop, step))
        interval = int(self.n_frames // 100)
        if interval == 0:
            interval = 1

        verbose = getattr(self, '_verbose', False)
        self._pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                 interval=interval,
                                 verbose=verbose)
Exemple #2
0
    def _prepare(self):
        n_dim = self._n_atoms * 3
        self.cov = np.zeros((n_dim, n_dim))
        self._ref_atom_positions = self._reference.positions
        self._ref_cog = self._reference.center_of_geometry()
        self._ref_atom_positions -= self._ref_cog

        if self._calc_mean:
            interval = int(self.n_frames // 100)
            interval = interval if interval > 0 else 1
            format = ("Mean Calculation Step"
                      "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r")
            mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                    interval=interval,
                                    verbose=self._verbose,
                                    format=format)
            for i, ts in enumerate(
                    self._u.trajectory[self.start:self.stop:self.step]):
                if self.align:
                    mobile_cog = self._atoms.center_of_geometry()
                    mobile_atoms, old_rmsd = _fit_to(self._atoms.positions,
                                                     self._ref_atom_positions,
                                                     self._atoms,
                                                     mobile_com=mobile_cog,
                                                     ref_com=self._ref_cog)
                else:
                    self.mean += self._atoms.positions.ravel()
                mean_pm.echo(i)
            self.mean /= self.n_frames

        self.mean_atoms = self._atoms
        self.mean_atoms.positions = self._atoms.positions
Exemple #3
0
    def _prepare(self):
        n_dim = self._n_atoms * 3
        self.cov = np.zeros((n_dim, n_dim))
        self._ref_atom_positions = self._reference.positions
        self._ref_cog = self._reference.center_of_geometry()
        self._ref_atom_positions -= self._ref_cog

        if self._calc_mean:
            interval = int(self.n_frames // 100)
            interval = interval if interval > 0 else 1
            format = ("Mean Calculation Step"
                      "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]\r")
            mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                    interval=interval, verbose=self._verbose,
                                    format=format)
            for i, ts in enumerate(self._u.trajectory[self.start:self.stop:
                                                      self.step]):
                if self.align:
                    mobile_cog = self._atoms.center_of_geometry()
                    mobile_atoms, old_rmsd = _fit_to(self._atoms.positions,
                                                     self._ref_atom_positions,
                                                     self._atoms,
                                                     mobile_com=mobile_cog,
                                                     ref_com=self._ref_cog)
                else:
                    self.mean += self._atoms.positions.ravel()
                mean_pm.echo(i)
            self.mean /= self.n_frames

        self.mean_atoms = self._atoms
        self.mean_atoms.positions = self._atoms.positions
 def _selection_serial(self, universe, selection_str):
     selection = []
     pm = ProgressMeter(universe.trajectory.n_frames,
                        interval=10, verbose=True)
     for ts in universe.trajectory:
         selection.append(universe.select_atoms(selection_str))
         pm.echo(ts.frame)
     return selection
Exemple #5
0
 def test_output(self, capsys):
     pm = ProgressMeter(10, interval=1)
     for i in range(10):
         pm.echo(i)
     out, err = capsys.readouterr()
     expected = 'Step    10/10 [100.0%]'
     actual = err.strip().split('\r')[-1]
     assert actual == expected
Exemple #6
0
 def _selection_serial(self, universe, selection_str):
     selection = []
     pm = ProgressMeter(universe.trajectory.n_frames,
                        interval=10,
                        verbose=True)
     for ts in universe.trajectory:
         selection.append(universe.select_atoms(selection_str))
         pm.echo(ts.frame)
     return selection
Exemple #7
0
    def write(self, filename, start=None, step=None, delta=None, load=True):
        """Write hopping trajectory as standard dcd file.

        write('TAP')

        :Arguments:

        load = True     Immediately loads the trajectory so that further
                        calls to next() will use the computed
                        trajectory and don't use expensive mapping.

        Ignore the other options and leave them at the defaults. Currently,
        only the whole trajectory is written. All atoms in the original
        trajectory are written to the output so you should be able to use your
        original psf file.

        NOTE: Fixed atoms are possibly not accounted for properly.

        Note that it is your responsibility to load the TAP trajectory and the
        appropriate psf together as there is very limited information stored in
        the dcd itself.
        """
        set_verbosity(self.verbosity)  # this is stupid

        psfname = self.filename(filename, 'psf')
        dcdname = self.filename(filename, 'dcd')

        # see MDAnalysis/src/dcd/dcd.c for explanations
        if start is None:
            start = self.traj.start_timestep  # starting time step for DCD file
        if step is None:
            step = self.traj.skip_timestep  # NSAVC (# ts between written DCD frames)
        if delta is None:
            delta = self.traj.delta  # length of ts (AKMA units)

        dcdwriter = MDAnalysis.DCD.DCDWriter(dcdname,
                                             self.ts.n_atoms,
                                             start,
                                             step,
                                             delta,
                                             remarks='TAP trajectory')
        pm = ProgressMeter(
            self.n_frames,
            interval=10,
            format=
            "Mapping TAP frame %(step)5d/%(numsteps)6d  [%(percentage)5.1f%%]\r"
        )
        for ts in self.map_dcd():
            dcdwriter.write_next_timestep(ts)
            pm.echo(ts.frame)
        dcdwriter.close()
        logger.info("TAPTrajectory.write(): wrote TAP traj %r.", dcdname)

        if load is True:
            self.TAPtraj = MDAnalysis.DCD.DCDReader(dcdname)
            self.trajectory = self.TAPtraj
Exemple #8
0
 def _selection_serial(self, universe, selection_str):
     selected = []
     pm = ProgressMeter(self.tf - self.t0,
                        interval=10,
                        verbose=True,
                        offset=-self.t0)
     for ts in universe.trajectory[self.t0:self.tf]:
         selected.append(universe.select_atoms(selection_str))
         pm.echo(ts.frame)
     return selected
Exemple #9
0
    def run(self, start=0, stop=-1, step=1, progout=10, quiet=False):
        """Calculate RMSF of given atoms across a trajectory.

        This method implements an algorithm for computing sums of squares while
        avoiding overflows and underflows [Welford1962]_.

        Parameters
        ----------
        start : int (optional)
            starting frame [0]
        stop : int (optional)
            stopping frame [-1]
        step : int (optional)
            step between frames [1]
        progout : int (optional)
            number of frames to iterate through between updates to progress
            output; ``None`` for no updates [10]
        quiet : bool (optional)
            if ``True``, suppress all output (implies *progout* = ``None``)
            [``False``]

        References
        ----------
        [Welford1962] B. P. Welford (1962). "Note on a Method for Calculating
           Corrected Sums of Squares and Products." Technometrics 4(3):419-420.
        """
        sumsquares = np.zeros((self.atomgroup.n_atoms, 3))
        means = np.array(sumsquares)

        if quiet:
            progout = None

        # set up progress output
        if progout:
            percentage = ProgressMeter(self.atomgroup.universe.trajectory.n_frames,
                                       interval=progout)
        else:
            percentage = ProgressMeter(self.atomgroup.universe.trajectory.n_frames,
                                       quiet=True)

        for k, ts in enumerate(self.atomgroup.universe.trajectory[start:stop:step]):
            sumsquares += (k/(k + 1.0)) * (self.atomgroup.positions - means)**2
            means = (k * means + self.atomgroup.positions)/(k + 1)

            percentage.echo(ts.frame)

        rmsf = np.sqrt(sumsquares.sum(axis=1)/(k + 1))

        if not (rmsf >= 0).all():
            raise ValueError("Some RMSF values negative; overflow " +
                             "or underflow occurred")

        self._rmsf = rmsf
Exemple #10
0
    def write(self, filename, start=None, step=None, delta=None, load=True):
        """Write hopping trajectory as standard dcd file, together with a minimal psf.

        write('hop')

        Arguments:

        load = True     Immediately loads the trajectory so that further
                        calls to next() will use the computed
                        trajectory and don't use expensive mapping.

        Ignore the other options and leave them at the
        defaults. Currently, only the whole trajectory is written. For
        visualization one also needs the dummy psf of the group.

        Results:

        filename.trajectory and filename.psf

        Note that it is your responsibility to load the hopping
        trajectory and the appropriate psf together as there is very
        limited information stored in the dcd itself.
        """
        set_verbosity(self.verbosity)  # this is stupid

        psfname = self.filename(filename, 'psf')
        dcdname = self.filename(filename, 'dcd')

        pm = ProgressMeter(
            self.n_frames,
            interval=10,
            format=
            "Mapping frame %(step)5d/%(numsteps)6d  [%(percentage)5.1f%%]\r")
        with MDAnalysis.Writer(
                dcdname,
                n_atoms=self.ts.n_atoms,
                dt=self.traj.dt,
                remarks='Hopping trajectory: x=site y=orbit_site z=0'
        ) as dcdwriter:
            for ts in self.map_dcd():
                dcdwriter.write_next_timestep(ts)
                pm.echo(ts.frame)
        logger.info("HoppingTrajectory.write(): wrote hoptraj %r.", dcdname)

        self.write_psf(psfname)
        logger.info("HoppingTrajectory.write(): wrote hoppsf %r.", psfname)

        if load is True:
            self.__init__(filename=filename, verbosity=self.verbosity)
Exemple #11
0
    def write(self,filename,start=None,step=None,delta=None,load=True):
        """Write hopping trajectory as standard dcd file.

        write('TAP')

        :Arguments:

        load = True     Immediately loads the trajectory so that further
                        calls to next() will use the computed
                        trajectory and don't use expensive mapping.

        Ignore the other options and leave them at the defaults. Currently,
        only the whole trajectory is written. All atoms in the original
        trajectory are written to the output so you should be able to use your
        original psf file.

        NOTE: Fixed atoms are possibly not accounted for properly.

        Note that it is your responsibility to load the TAP trajectory and the
        appropriate psf together as there is very limited information stored in
        the dcd itself.
        """
        set_verbosity(self.verbosity)  # this is stupid

        psfname = self.filename(filename,'psf')
        dcdname = self.filename(filename,'dcd')

        # see MDAnalysis/src/dcd/dcd.c for explanations
        if start is None:
            start = self.traj.start_timestep # starting time step for DCD file
        if step is None:
            step = self.traj.skip_timestep   # NSAVC (# ts between written DCD frames)
        if delta is None:
            delta = self.traj.delta          # length of ts (AKMA units)

        dcdwriter = MDAnalysis.DCD.DCDWriter(dcdname,self.ts.n_atoms,
                                             start,step,delta,
                                             remarks='TAP trajectory')
        pm = ProgressMeter(self.n_frames, interval=10,
                           format="Mapping TAP frame %(step)5d/%(numsteps)6d  [%(percentage)5.1f%%]\r")
        for ts in self.map_dcd():
            dcdwriter.write_next_timestep(ts)
            pm.echo(ts.frame)
        dcdwriter.close()
        logger.info("TAPTrajectory.write(): wrote TAP traj %r.", dcdname)

        if load is True:
            self.TAPtraj = MDAnalysis.DCD.DCDReader(dcdname)
            self.trajectory = self.TAPtraj
    def _prepare(self):
        # access start index
        self._u.trajectory[self.start]
        # reference will be start index
        self._reference = self._u.select_atoms(self._select)
        self._atoms = self._u.select_atoms(self._select)
        self._n_atoms = self._atoms.n_atoms

        if self._mean is None:
            self.mean = np.zeros(self._n_atoms * 3)
            self._calc_mean = True
        else:
            self.mean = self._mean.positions
            self._calc_mean = False

        if self.n_frames == 1:
            raise ValueError('No covariance information can be gathered from a'
                             'single trajectory frame.\n')
        n_dim = self._n_atoms * 3
        self.cov = np.zeros((n_dim, n_dim))
        self._ref_atom_positions = self._reference.positions
        self._ref_cog = self._reference.center_of_geometry()
        self._ref_atom_positions -= self._ref_cog

        if self._calc_mean:
            interval = int(self.n_frames // 100)
            interval = interval if interval > 0 else 1
            format = ("Mean Calculation Step"
                      "%(step)5d/%(numsteps)d [%(percentage)5.1f%%]")
            mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                    interval=interval,
                                    verbose=self._verbose,
                                    format=format)
            for i, ts in enumerate(
                    self._u.trajectory[self.start:self.stop:self.step]):
                if self.align:
                    mobile_cog = self._atoms.center_of_geometry()
                    mobile_atoms, old_rmsd = _fit_to(self._atoms.positions,
                                                     self._ref_atom_positions,
                                                     self._atoms,
                                                     mobile_com=mobile_cog,
                                                     ref_com=self._ref_cog)
                else:
                    self.mean += self._atoms.positions.ravel()
                mean_pm.echo(i)
            self.mean /= self.n_frames

        self.mean_atoms = self._atoms
        self.mean_atoms.positions = self._atoms.positions
Exemple #13
0
    def _setup_frames(self, trajectory, start=None, stop=None, step=None):
        """
        Pass a Reader object and define the desired iteration pattern
        through the trajectory

        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        """
        self._trajectory = trajectory
        # TODO: Remove once start/stop/step are deprecated from init
        # See if these have been set as class attributes, and use that
        start = getattr(self, 'start', start)
        stop = getattr(self, 'stop', stop)
        step = getattr(self, 'step', step)
        start, stop, step = trajectory.check_slice_indices(start, stop, step)
        self.start = start
        self.stop = stop
        self.step = step
        self.n_frames = len(range(start, stop, step))
        interval = int(self.n_frames // 100)
        if interval == 0:
            interval = 1

        verbose = getattr(self, '_verbose', False)
        self._pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                 interval=interval, verbose=verbose)
Exemple #14
0
 def test_deprecated(self, capsys):
     with warnings.catch_warnings(record=True) as w:
         # Cause all warnings to always be triggered.
         warnings.simplefilter("always")
         # Trigger a warning.
         pm = ProgressMeter(10)
         # Verify the warning
         assert len(w) == 1
         assert issubclass(w[-1].category, DeprecationWarning)
         assert "MDAnalysis.lib.log.ProgressBar" in str(w[-1].message)
Exemple #15
0
    def write(self,filename,start=None,step=None,delta=None,load=True):
        """Write hopping trajectory as standard dcd file, together with a minimal psf.

        write('hop')

        Arguments:

        load = True     Immediately loads the trajectory so that further
                        calls to next() will use the computed
                        trajectory and don't use expensive mapping.

        Ignore the other options and leave them at the
        defaults. Currently, only the whole trajectory is written. For
        visualization one also needs the dummy psf of the group.

        Results:

        filename.trajectory and filename.psf

        Note that it is your responsibility to load the hopping
        trajectory and the appropriate psf together as there is very
        limited information stored in the dcd itself.
        """
        set_verbosity(self.verbosity)  # this is stupid

        psfname = self.filename(filename,'psf')
        dcdname = self.filename(filename,'dcd')

        pm = ProgressMeter(self.n_frames, interval=10,
                           format="Mapping frame %(step)5d/%(numsteps)6d  [%(percentage)5.1f%%]\r")
        with MDAnalysis.Writer(dcdname, n_atoms=self.ts.n_atoms,
                               dt=self.traj.dt,
                               remarks='Hopping trajectory: x=site y=orbit_site z=0') as dcdwriter:
            for ts in self.map_dcd():
                dcdwriter.write_next_timestep(ts)
                pm.echo(ts.frame)
        logger.info("HoppingTrajectory.write(): wrote hoptraj %r.", dcdname)

        self.write_psf(psfname)
        logger.info("HoppingTrajectory.write(): wrote hoppsf %r.", psfname)

        if load is True:
            self.__init__(filename=filename,verbosity=self.verbosity)
    def run(self, force=False):
        """
        Run all the required passes

        :Keywords:
          *force*
            Will overwrite previous results if they exist
        """
        # if results exist, don't waste any time
        if not self.solution['results'] is None and not force:
            return

        master_results = numpy.zeros_like(numpy.arange(self._starts[0],
                                                       self._stops[0],
                                                       self._skip),
                                          dtype=numpy.float32)
        # for normalising later
        counter = numpy.zeros_like(master_results, dtype=numpy.float32)

        pm = ProgressMeter(self.nruns, interval=1,
                           format="Performing run %(step)5d/%(numsteps)d"
                                  "[%(percentage)5.1f%%]\r")

        for i, (start, stop) in enumerate(izip(self._starts, self._stops)):
            pm.echo(i + 1)

            # needed else trj seek thinks a numpy.int64 isn't an int?
            results = self._single_run(int(start), int(stop))

            nresults = len(results)
            if nresults == len(master_results):
                master_results += results
                counter += 1.0
            else:
                master_results[:nresults] += results
                counter[:nresults] += 1.0

        master_results /= counter

        self.solution['time'] = numpy.arange(
            len(master_results),
            dtype=numpy.float32) * self.u.trajectory.dt * self._skip
        self.solution['results'] = master_results
Exemple #17
0
    def run(self, force=False):
        """Run all the required passes

        Parameters
        ----------
        force : bool, optional
            Will overwrite previous results if they exist
        """
        # if results exist, don't waste any time
        if self.solution['results'] is not None and not force:
            return

        master_results = np.zeros_like(np.arange(self._starts[0],
                                                 self._stops[0], self._skip),
                                       dtype=np.float32)
        # for normalising later
        counter = np.zeros_like(master_results, dtype=np.float32)

        pm = ProgressMeter(self.nruns,
                           interval=1,
                           format="Performing run %(step)5d/%(numsteps)d"
                           "[%(percentage)5.1f%%]\r")

        for i, (start, stop) in enumerate(zip(self._starts, self._stops)):
            pm.echo(i + 1)

            # needed else trj seek thinks a np.int64 isn't an int?
            results = self._single_run(int(start), int(stop))

            nresults = len(results)
            if nresults == len(master_results):
                master_results += results
                counter += 1.0
            else:
                master_results[:nresults] += results
                counter[:nresults] += 1.0

        master_results /= counter

        self.solution['time'] = np.arange(
            len(master_results),
            dtype=np.float32) * self.u.trajectory.dt * self._skip
        self.solution['results'] = master_results
Exemple #18
0
    def _setup_frames(self, trajectory, start=None, stop=None, step=None):
        """
        Pass a Reader object and define the desired iteration pattern
        through the trajectory

        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        """
        self._trajectory = trajectory
        start, stop, step = trajectory.check_slice_indices(start, stop, step)
        self.start = start
        self.stop = stop
        self.step = step
        self.n_frames = len(range(start, stop, step))
        interval = int(self.n_frames // 100)
        if interval == 0:
            interval = 1

        # ensure _verbose is set when __init__ wasn't called, this is to not
        # break pre 0.16.0 API usage of AnalysisBase
        if not hasattr(self, '_verbose'):
            if hasattr(self, '_quiet'):
                # Here, we are in the odd case where a children class defined
                # self._quiet without going through AnalysisBase.__init__.
                warnings.warn(
                    "The *_quiet* attribute of analyses is "
                    "deprecated (from 0.16)use *_verbose* instead.",
                    DeprecationWarning)
                self._verbose = not self._quiet
            else:
                self._verbose = True
                self._quiet = not self._verbose
        self._pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                 interval=interval,
                                 verbose=self._verbose)
Exemple #19
0
    def run(self, start=0, stop=-1, step=1, progout=10, quiet=False):
        """Calculate RMSF of given atoms across a trajectory.

        This method implements an algorithm for computing sums of squares while
        avoiding overflows and underflows [Welford1962]_.

        Parameters
        ----------
        start : int (optional)
            starting frame [0]
        stop : int (optional)
            stopping frame [-1]
        step : int (optional)
            step between frames [1]
        progout : int (optional)
            number of frames to iterate through between updates to progress
            output; ``None`` for no updates [10]
        quiet : bool (optional)
            if ``True``, suppress all output (implies *progout* = ``None``)
            [``False``]

        References
        ----------
        [Welford1962] B. P. Welford (1962). "Note on a Method for Calculating
           Corrected Sums of Squares and Products." Technometrics 4(3):419-420.
        """
        sumsquares = np.zeros((self.atomgroup.n_atoms, 3))
        means = np.array(sumsquares)

        if quiet:
            progout = None

        # set up progress output
        if progout:
            percentage = ProgressMeter(
                self.atomgroup.universe.trajectory.n_frames, interval=progout)
        else:
            percentage = ProgressMeter(
                self.atomgroup.universe.trajectory.n_frames, quiet=True)

        for k, ts in enumerate(
                self.atomgroup.universe.trajectory[start:stop:step]):
            sumsquares += (k /
                           (k + 1.0)) * (self.atomgroup.positions - means)**2
            means = (k * means + self.atomgroup.positions) / (k + 1)

            percentage.echo(ts.frame)

        rmsf = np.sqrt(sumsquares.sum(axis=1) / (k + 1))

        if not (rmsf >= 0).all():
            raise ValueError("Some RMSF values negative; overflow " +
                             "or underflow occurred")

        self._rmsf = rmsf
Exemple #20
0
    def _setup_frames(self, trajectory, start=None, stop=None, step=None):
        """
        Pass a Reader object and define the desired iteration pattern
        through the trajectory

        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        """
        self._trajectory = trajectory
        self.start = start
        self.stop = stop
        self.step = step
        start, stop, step = trajectory.check_slice_indices(start, stop, step)
        self.n_frames = len(range(start, stop, step))
        interval = int(self.n_frames // 100)
        if interval == 0:
            interval = 1

        # ensure _verbose is set when __init__ wasn't called, this is to not
        # break pre 0.16.0 API usage of AnalysisBase
        if not hasattr(self, '_verbose'):
            if hasattr(self, '_quiet'):
                # Here, we are in the odd case where a children class defined
                # self._quiet without going through AnalysisBase.__init__.
                warnings.warn("The *_quiet* attribute of analyses is "
                              "deprecated (from 0.16)use *_verbose* instead.",
                              DeprecationWarning)
                self._verbose = not self._quiet
            else:
                self._verbose = True
                self._quiet = not self._verbose
        self._pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                 interval=interval, verbose=self._verbose)
Exemple #21
0
def helanal_trajectory(universe,
                       selection="name CA",
                       start=None,
                       end=None,
                       begin=None,
                       finish=None,
                       matrix_filename="bending_matrix.dat",
                       origin_pdbfile="origin.pdb",
                       summary_filename="summary.txt",
                       screw_filename="screw.xvg",
                       tilt_filename="local_tilt.xvg",
                       fitted_tilt_filename="fit_tilt.xvg",
                       bend_filename="local_bend.xvg",
                       twist_filename="unit_twist.xvg",
                       prefix="helanal_",
                       ref_axis=None,
                       quiet=False):
    """Perform HELANAL_ helix analysis on all frames in *universe*.

    .. Note::

       Only a single helix is analyzed. Use the selection to specify the
       helix, e.g. with "name CA and resid 1:20" or use start=1, stop=20.

    :Arguments:
       *universe*
          :class:`~MDAnalysis.core.AtomGroup.Universe`

    :Keywords:
       *selection*
          selection string that selects Calpha atoms [``"name CA"``]
       *start*
          start residue resid
       *end*
          end residue resid
       *begin*
          start analysing for time (ps) >= *begin*; ``None`` starts from the
          beginning [``None``]
       *finish*
          stop analysis for time (ps) =< *finish*; ``None`` goes to the
          end of the trajectory [``None``]
       *matrix_filename*
          Output file- bending matrix [``"bending_matrix.dat"``]
       *origin_pdbfile*
          Output file- origin pdb file [``"origin.pdb"``]
       *summary_filename*
          Output file- all of the basic data [``"summary.txt"``]
       *screw_filename*
          Output file- local tilts of individual residues from 2 to n-1
          [``"screw.xvg"``]
       *tilt_filename*
          Output file- tilt of line of best fit applied to origin axes
          [``"local_tilt.xvg"``]
       *bend_filename*
          Output file- local bend angles between successive local helix axes
          [``"local_bend.xvg"``]
       *twist_filename*
          Output file- local unit twist between successive helix turns
          [``"unit_twist.xvg"``]
       *prefix*
          Prefix to add to all output file names; set to ``None`` to disable
          [``"helanal__"``]
       *ref_axis*
          Calculate tilt angle relative to the axis; if ``None`` then ``[0,0,1]``
          is chosen [``None``]
       *quiet*
          Suppress most diagnostic output.

    :Raises:
       FinishTimeException
          If the specified finish time precedes the specified start time or
          current time stamp of trajectory object.

    .. versionchanged:: 0.13.0
       New *quiet* keyword to silence frame progress output and most of the
       output that used to be printed to stdout is now logged to the logger
       *MDAnalysis.analysis.helanal* (at logelevel *INFO*).
    """
    if ref_axis is None:
        ref_axis = np.array([0., 0., 1.])
    else:
        # enable MDA API so that one can use a tuple of atoms or AtomGroup with
        # two atoms
        ref_axis = np.asarray(ref_axis)

    if not (start is None and end is None):
        if start is None:
            start = universe.atoms[0].resid
        if end is None:
            end = universe.atoms[-1].resid
        selection += " and resid {start:d}:{end:d}".format(**vars())
    ca = universe.select_atoms(selection)
    trajectory = universe.trajectory

    if finish is not None:
        if trajectory.ts.time > finish:
            # you'd be starting with a finish time (in ps) that has already passed or not
            # available
            raise FinishTimeException(
                'The input finish time ({finish} ps) precedes the current trajectory time of {traj_time} ps.'
                .format(finish=finish, traj_time=trajectory.time))

    if start is not None and end is not None:
        logger.info("Analysing from residue %d to %d", start, end)
    elif start is not None and end is None:
        logger.info("Analysing from residue %d to the C termini", start)
    elif start is None and end is not None:
        logger.info("Analysing from the N termini to %d", end)
    logger.info("Analysing %d/%d residues", ca.n_atoms,
                universe.atoms.n_residues)

    if prefix is not None:
        prefix = str(prefix)
        matrix_filename = prefix + matrix_filename
        origin_pdbfile = prefix + origin_pdbfile
        summary_filename = prefix + summary_filename
        screw_filename = prefix + screw_filename
        tilt_filename = prefix + tilt_filename
        fitted_tilt_filename = prefix + fitted_tilt_filename
        bend_filename = prefix + bend_filename
        twist_filename = prefix + twist_filename
    backup_file(matrix_filename)
    backup_file(origin_pdbfile)
    backup_file(summary_filename)
    backup_file(screw_filename)
    backup_file(tilt_filename)
    backup_file(fitted_tilt_filename)
    backup_file(bend_filename)
    backup_file(twist_filename)

    global_height = []
    global_twist = []
    global_rnou = []
    global_bending = []
    global_bending_matrix = []
    global_tilt = []
    global_fitted_tilts = []
    global_screw = []

    pm = ProgressMeter(trajectory.n_frames,
                       quiet=quiet,
                       format="Frame %(step)10d: %(time)20.1f ps\r")
    for ts in trajectory:
        pm.echo(ts.frame, time=ts.time)
        frame = ts.frame
        if begin is not None:
            if trajectory.time < begin:
                continue
        if finish is not None:
            if trajectory.time > finish:
                break

        ca_positions = ca.positions
        twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles = \
            main_loop(ca_positions, ref_axis=ref_axis)

        origin_pdb(origins, origin_pdbfile)

        #calculate local bending matrix( it is looking at all i, j combinations)
        if len(global_bending_matrix) == 0:
            global_bending_matrix = [[[] for item in local_helix_axes]
                                     for item in local_helix_axes]

        for i in range(len(local_helix_axes)):
            for j in range(i + 1, len(local_helix_axes)):
                angle = np.rad2deg(
                    np.arccos(np.dot(local_helix_axes[i],
                                     local_helix_axes[j])))
                global_bending_matrix[i][j].append(angle)
                #global_bending_matrix[j][i].append(angle)
                #global_bending_matrix[i][i].append(0.)

        fit_vector, fit_tilt = vector_of_best_fit(origins)
        global_height += height
        global_twist += twist
        global_rnou += rnou
        #global_screw.append(local_screw_angles)
        global_fitted_tilts.append(np.rad2deg(fit_tilt))

        #print out rotations across the helix to a file
        with open(twist_filename, "a") as twist_output:
            print(frame, end='', file=twist_output)
            for loc_twist in twist:
                print(loc_twist, end='', file=twist_output)
            print("", file=twist_output)

        with open(bend_filename, "a") as bend_output:
            print(frame, end='', file=bend_output)
            for loc_bend in bending_angles:
                print(loc_bend, end='', file=bend_output)
            print("", file=bend_output)

        with open(screw_filename, "a") as rot_output:
            print(frame, end='', file=rot_output)
            for rotation in local_screw_angles:
                print(rotation, end='', file=rot_output)
            print("", file=rot_output)

        with open(tilt_filename, "a") as tilt_output:
            print(frame, end='', file=tilt_output)
            for tilt in local_helix_axes:
                print(np.rad2deg(mdamath.angle(tilt, ref_axis)),
                      end='',
                      file=tilt_output)
            print("", file=tilt_output)

        with open(fitted_tilt_filename, "a") as tilt_output:
            print(frame, np.rad2deg(fit_tilt), file=tilt_output)

        if len(global_bending) == 0:
            global_bending = [[] for item in bending_angles]
            #global_tilt = [ [] for item in local_helix_axes ]
        for store, tmp in zip(global_bending, bending_angles):
            store.append(tmp)
        #for store,tmp in zip(global_tilt,local_helix_axes): store.append(mdamath.angle(tmp,ref_axis))

    twist_mean, twist_sd, twist_abdev = stats(global_twist)
    height_mean, height_sd, height_abdev = stats(global_height)
    rnou_mean, rnou_sd, rnou_abdev = stats(global_rnou)
    ftilt_mean, ftilt_sd, ftilt_abdev = stats(global_fitted_tilts)

    bending_statistics = [stats(item) for item in global_bending]
    #tilt_statistics =    [ stats(item) for item in global_tilt]

    bending_statistics_matrix = [[stats(col) for col in row]
                                 for row in global_bending_matrix]
    with open(matrix_filename, 'w') as mat_output:
        print("Mean", file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[0])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

        print('\nSD', file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[1])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

        print("\nABDEV", file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[2])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

    logger.info("Height: %g  SD: %g  ABDEV: %g  (Angstroem)", height_mean,
                height_sd, height_abdev)
    logger.info("Twist: %g  SD: %g  ABDEV: %g", twist_mean, twist_sd,
                twist_abdev)
    logger.info("Residues/turn: %g  SD: %g  ABDEV: %g", rnou_mean, rnou_sd,
                rnou_abdev)
    logger.info("Fitted tilt: %g  SD: %g  ABDEV: %g", ftilt_mean, ftilt_sd,
                ftilt_abdev)
    logger.info("Local bending angles:")
    residue_statistics = zip(*bending_statistics)
    measure_names = ["Mean ", "SD   ", "ABDEV"]
    if start is None:
        output = " ".join([
            "{0:8d}".format(item)
            for item in range(4,
                              len(residue_statistics[0]) + 4)
        ])
    else:
        output = " ".join([
            "{0:8d}".format(item)
            for item in range(start + 3,
                              len(residue_statistics[0]) + start + 3)
        ])
    logger.info("ResID %s", output)
    for measure, name in zip(residue_statistics, measure_names):
        output = str(name) + " "
        output += " ".join(["{0:8.1f}".format(residue) for residue in measure])
        logger.info(output)

    with open(summary_filename, 'w') as summary_output:
        print("Height:",
              height_mean,
              "SD",
              height_sd,
              "ABDEV",
              height_abdev,
              '(nm)',
              file=summary_output)
        print("Twist:",
              twist_mean,
              "SD",
              twist_sd,
              "ABDEV",
              twist_abdev,
              file=summary_output)
        print("Residues/turn:",
              rnou_mean,
              "SD",
              rnou_sd,
              "ABDEV",
              rnou_abdev,
              file=summary_output)
        print("Local bending angles:", file=summary_output)
        residue_statistics = list(zip(*bending_statistics))
        measure_names = ["Mean ", "SD   ", "ABDEV"]
        print("ResID", end='', file=summary_output)
        if start is None:
            for item in range(4, len(residue_statistics[0]) + 4):
                output = "{0:8d}".format(item)
                print(output, end='', file=summary_output)
        else:
            for item in range(start + 3,
                              len(residue_statistics[0]) + start + 3):
                output = "{0:8d}".format(item)
                print(output, end='', file=summary_output)
        print('', file=summary_output)

        for measure, name in zip(residue_statistics, measure_names):
            print(name, end='', file=summary_output)
            for residue in measure:
                output = "{0:8.1f}".format(residue)
                print(output, end='', file=summary_output)
            print('', file=summary_output)
Exemple #22
0
def density_from_Universe(universe, delta=1.0, select='name OH2',
                          start=None, stop=None, step=None,
                          metadata=None, padding=2.0, cutoff=0, soluteselection=None,
                          use_kdtree=True, update_selection=False,
                          verbose=False, interval=1, quiet=None,
                          parameters=None,
                          gridcenter=None, xdim=None, ydim=None, zdim=None):
    """Create a density grid from a :class:`MDAnalysis.Universe` object.

    The trajectory is read, frame by frame, and the atoms selected with
    `select` are histogrammed on a grid with spacing `delta`.
    A physical density of units [Angstrom^{-3}] is returned (see
    :class:`Density` for more details).

    Parameters
    ----------
    universe : MDAnalysis.Universe
            :class:`MDAnalysis.Universe` object with a trajectory
    select : str (optional)
            selection string (MDAnalysis syntax) for the species to be analyzed
            ["name OH2"]
    delta : float (optional)
            bin size for the density grid in Angstrom (same in x,y,z) [1.0]
    start : int (optional)
    stop : int (optional)
    step : int (optional)
            Slice the trajectory as ``trajectory[start:stop:step]``; default
            is to read the whole trajectory.
    metadata : dict. optional
            `dict` of additional data to be saved with the object; the meta data
            are passed through as they are.
    padding : float (optional)
            increase histogram dimensions by padding (on top of initial box size)
            in Angstrom. Padding is ignored when setting a user defined grid. [2.0]
    soluteselection : str (optional)
            MDAnalysis selection for the solute, e.g. "protein" [``None``]
    cutoff : float (optional)
            With `cutoff`, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>"
            (Special routines that are faster than the standard ``AROUND`` selection);
            any value that evaluates to ``False`` (such as the default 0) disables this
            special selection.
    update_selection : bool (optional)
            Should the selection of atoms be updated for every step? [``False``]

            - ``True``: atom selection is updated for each frame, can be slow
            - ``False``: atoms are only selected at the beginning
    verbose : bool (optional)
            Print status update to the screen for every *interval* frame? [``True``]

            - ``False``: no status updates when a new frame is processed
            - ``True``: status update every frame (including number of atoms
              processed, which is interesting with ``update_selection=True``)
    interval : int (optional)
           Show status update every `interval` frame [1]
    parameters : dict (optional)
            `dict` with some special parameters for :class:`Density` (see docs)
    gridcenter : numpy ndarray, float32 (optional)
            3 element numpy array detailing the x, y and z coordinates of the
            center of a user defined grid box in Angstrom [``None``]
    xdim : float (optional)
            User defined x dimension box edge in Angstrom; ignored if
            gridcenter is ``None``
    ydim : float (optional)
            User defined y dimension box edge in Angstrom; ignored if
            gridcenter is ``None``
    zdim : float (optional)
            User defined z dimension box edge in Angstrom; ignored if
            gridcenter is ``None``

    Returns
    -------
    :class:`Density`
            A :class:`Density` instance with the histogrammed data together
            with associated metadata.


    Notes
    -----

    By default, the `select` is static, i.e., atoms are only selected
    once at the beginning. If you want *dynamically changing selections* (such
    as "name OW and around 4.0 (protein and not name H*)", i.e., the water
    oxygen atoms that are within 4 Å of the protein heavy atoms) then set
    ``update_selection=True``. For the special case of calculating a density of
    the "bulk" solvent away from a solute use the optimized selections with
    keywords *cutoff* and *soluteselection* (see Examples below).

    Examples
    --------
    Basic use for creating a water density (just using the water oxygen atoms "OW")::

      density = density_from_Universe(universe, delta=1.0, select='name OW')

    If you are only interested in water within a certain region, e.g., within a
    vicinity around a binding site, you can use a selection that updates every
    step by setting the `update_selection` keyword argument::

      site_density = density_from_Universe(universe, delta=1.0,
                                           select='name OW and around 5 (resid 156 157 305)',
                                           update_selection=True)

    A special case for an updating selection is to create the "bulk density",
    i.e., the water outside the immediate solvation shell of a protein: Select
    all water oxygen atoms that are *farther away* than a given cut-off (say, 4
    Å) from the solute (here, heavy atoms of the protein)::

      bulk = density_from_Universe(universe, delta=1.0, select='name OW',
                                   solute="protein and not name H*",
                                   cutoff=4)

    (Using the special case for the bulk with `soluteselection` and `cutoff`
    improves performance over the simple `update_selection` approach.)

    If you are interested in explicitly setting a grid box of a given edge size
    and origin, you can use the gridcenter and x/y/zdim arguments. For example
    to plot the density of waters within 5 Å of a ligand (in this case the
    ligand has been assigned the residue name "LIG") in a cubic grid with 20 Å
    edges which is centered on the centre of mass (COM) of the ligand::

      # Create a selection based on the ligand
      ligand_selection = universe.select_atoms("resname LIG")

      # Extract the COM of the ligand
      ligand_COM = ligand_selection.center_of_mass()

      # Generate a density of waters on a cubic grid centered on the ligand COM
      # In this case, we update the atom selection as shown above.
      water_density = density_from_Universe(universe, delta=1.0,
                                            select='name OW around 5 resname LIG',
                                            update_selection=True,
                                            gridcenter=ligand_COM,
                                            xdim=20.0, ydim=20.0, zdim=20.0)

      (It should be noted that the `padding` keyword is not used when a user
      defined grid is assigned).

    As detailed above, the :class:`Density` object returned contains a
    physical density in units of Angstrom^{-3}. If you are interested in
    recovering the underlying probability density, simply divide by the sum::

      physical_density = density_from_Universe(universe, delta=1.0,
                                               select='name OW')

      probability_density = physical_density / physical_density.grid.sum()

    Similarly, if you would like to recover a grid containing a histogram of
    atom counts, simply multiply by the volume::

      # Here we assume that numpy is imported as np
      volume = np.prod(physical_density.delta)

      atom_count_histogram = physical_density * volume


    .. versionchanged:: 0.21.0
       Warns users that `padding` value is not used in user defined grids
    .. versionchanged:: 0.20.0
       ProgressMeter now iterates over the number of frames analysed.
    .. versionchanged:: 0.19.0
       *gridcenter*, *xdim*, *ydim* and *zdim* keywords added to allow for user
       defined boxes
    .. versionchanged:: 0.13.0
       *update_selection* and *quiet* keywords added
    .. deprecated:: 0.16
       The keyword argument *quiet* is deprecated in favor of *verbose*.
    .. versionchanged:: 0.21.0
       time_unit and length_unit default to ps and Angstrom now flags have
       been removed (same as previous flag defaults)
    """
    u = universe

    if cutoff > 0 and soluteselection is not None:
        # special fast selection for '<atomsel> not within <cutoff> of <solutesel>'
        notwithin_coordinates = notwithin_coordinates_factory(
            u, select, soluteselection, cutoff,
            use_kdtree=use_kdtree, updating_selection=update_selection)
        def current_coordinates():
            return notwithin_coordinates()
    else:
        group = u.select_atoms(select, updating=update_selection)

        def current_coordinates():
            return group.positions

    coord = current_coordinates()
    logger.info(
        "Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total."
        "".format(coord.shape[0], len(u.select_atoms(select)),
                  select, len(u.atoms))
    )

    # mild warning; typically this is run on RMS-fitted trajectories and
    # so the box information is rather meaningless
    box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:]
    if tuple(angles) != (90., 90., 90.):
        msg = ("Non-orthorhombic unit-cell --- "
               "make sure that it has been remapped properly!")
        warnings.warn(msg)
        logger.warning(msg)

    if gridcenter is not None:
        # Issue 2372: padding is ignored, defaults to 2.0 therefore warn
        if padding > 0:
            msg = ("Box padding (currently set at {0}) "
                   "is not used in user defined grids.".format(padding))
            warnings.warn(msg)
            logger.warning(msg)
        # Generate a copy of smin/smax from coords to later check if the
        # defined box might be too small for the selection
        smin = np.min(coord, axis=0)
        smax = np.max(coord, axis=0)
        # Overwrite smin/smax with user defined values
        smin, smax = _set_user_grid(gridcenter, xdim, ydim, zdim, smin, smax)
    else:
        # Make the box bigger to avoid as much as possible 'outlier'. This
        # is important if the sites are defined at a high density: in this
        # case the bulk regions don't have to be close to 1 * n0 but can
        # be less. It's much more difficult to deal with outliers.  The
        # ideal solution would use images: implement 'looking across the
        # periodic boundaries' but that gets complicate when the box
        # rotates due to RMS fitting.
        smin = np.min(coord, axis=0) - padding
        smax = np.max(coord, axis=0) + padding

    BINS = fixedwidth_bins(delta, smin, smax)
    arange = np.vstack((BINS['min'], BINS['max']))
    arange = np.transpose(arange)
    bins = BINS['Nbins']

    # create empty grid with the right dimensions (and get the edges)
    grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False)
    grid *= 0.0
    h = grid.copy()

    start, stop, step = u.trajectory.check_slice_indices(start, stop, step)
    n_frames = len(range(start, stop, step))

    pm = ProgressMeter(n_frames, interval=interval,
                       verbose=verbose,
                       format="Histogramming %(n_atoms)6d atoms in frame "
                       "%(step)5d/%(numsteps)d  [%(percentage)5.1f%%]")

    for index, ts in enumerate(u.trajectory[start:stop:step]):
        coord = current_coordinates()

        pm.echo(index, n_atoms=len(coord))
        if len(coord) == 0:
            continue

        h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False)
        grid += h  # accumulate average histogram

    grid /= float(n_frames)

    metadata = metadata if metadata is not None else {}
    metadata['psf'] = u.filename
    metadata['dcd'] = u.trajectory.filename
    metadata['select'] = select
    metadata['n_frames'] = n_frames
    metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3)
    metadata['dt'] = u.trajectory.dt
    metadata['time_unit'] = 'ps'
    try:
        metadata['trajectory_skip'] = u.trajectory.skip_timestep  # frames
    except AttributeError:
        metadata['trajectory_skip'] = 1  # seems to not be used..
    try:
        metadata['trajectory_delta'] = u.trajectory.delta  # in native units
    except AttributeError:
        metadata['trajectory_delta'] = 1
    if cutoff > 0 and soluteselection is not None:
        metadata['soluteselection'] = soluteselection
        metadata['cutoff'] = cutoff  # in Angstrom

    parameters = parameters if parameters is not None else {}
    parameters['isDensity'] = False  # must override


    g = Density(grid=grid, edges=edges, units={'length': 'Angstrom'},
                parameters=parameters, metadata=metadata)
    g.make_density()
    logger.info("Density completed (initial density in Angstrom**-3)")

    return g
Exemple #23
0
def density_from_Universe(universe, delta=1.0, atomselection='name OH2',
                          start=None, stop=None, step=None,
                          metadata=None, padding=2.0, cutoff=0, soluteselection=None,
                          use_kdtree=True, update_selection=False,
                          quiet=False, interval=1,
                          **kwargs):
    """Create a density grid from a :class:`MDAnalysis.Universe` object.

    The trajectory is read, frame by frame, and the atoms selected with *atomselection* are
    histogrammed on a grid with spacing *delta*::

      density_from_Universe(universe, delta=1.0, atomselection='name OH2', ...) --> density

    .. Note:: By default, the *atomselection* is static, i.e., atoms are only
              selected once at the beginning. If you want dynamically changing
              selections (such as "name OW and around 4.0 (protein and not name
              H*)") then set ``update_selection=True``. For the special case of
              calculating a density of the "bulk" solvent away from a solute
              use the optimized selections with keywords *cutoff* and
              *soluteselection*.

    :Arguments:
      universe
            :class:`MDAnalysis.Universe` object with a trajectory

    :Keywords:
      atomselection
            selection string (MDAnalysis syntax) for the species to be analyzed
            ["name OH2"]
      delta
            bin size for the density grid in Angstroem (same in x,y,z) [1.0]
      start, stop, step
            Slice the trajectory as ``trajectory[start"stop:step]``; default
            is to read the whole trajectory.
      metadata
            dictionary of additional data to be saved with the object
      padding
            increase histogram dimensions by padding (on top of initial box size)
            in Angstroem [2.0]
      soluteselection
            MDAnalysis selection for the solute, e.g. "protein" [``None``]
      cutoff
            With *cutoff*, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>"
            (Special routines that are faster than the standard ``AROUND`` selection)
            [0]
      update_selection
            Should the selection of atoms be updated for every step? [``False``]
            - ``True``: atom selection is updated for each frame, can be slow
            - ``False``: atoms are only selected at the beginning
      quiet
            Print status update to the screen for every *interval* frame? [``False``]
            - ``True``: no status updates when a new frame is processed
            - ``False``: status update every frame (including number of atoms
              processed, which is interesting with ``update_selection=True``)
      interval
           Show status update every *interval* frame [1]
      parameters
            dict with some special parameters for :class:`Density` (see doc)
      kwargs
            metadata, parameters are modified and passed on to :class:`Density`

    :Returns: :class:`Density`

    .. versionchanged:: 0.13.0
       *update_selection* and *quite* keywords added

    """
    try:
        universe.select_atoms('all')
        universe.trajectory.ts
    except AttributeError:
        raise TypeError("The universe must be a proper MDAnalysis.Universe instance.")
    u = universe
    if cutoff > 0 and soluteselection is not None:
        # special fast selection for '<atomsel> not within <cutoff> of <solutesel>'
        notwithin_coordinates = notwithin_coordinates_factory(u, atomselection, soluteselection, cutoff,
                                                              use_kdtree=use_kdtree)
        def current_coordinates():
            return notwithin_coordinates()
    else:
        group = u.select_atoms(atomselection)

        def current_coordinates():
            return group.coordinates()

    coord = current_coordinates()
    logger.info("Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total.".format(coord.shape[0], len(u.select_atoms(atomselection)), atomselection, len(u.atoms)))

    # mild warning; typically this is run on RMS-fitted trajectories and
    # so the box information is rather meaningless
    box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:]
    if tuple(angles) != (90., 90., 90.):
        msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!"
        warnings.warn(msg)
        logger.warn(msg)

    # Make the box bigger to avoid as much as possible 'outlier'. This
    # is important if the sites are defined at a high density: in this
    # case the bulk regions don't have to be close to 1 * n0 but can
    # be less. It's much more difficult to deal with outliers.  The
    # ideal solution would use images: implement 'looking across the
    # periodic boundaries' but that gets complicate when the box
    # rotates due to RMS fitting.
    smin = np.min(coord, axis=0) - padding
    smax = np.max(coord, axis=0) + padding

    BINS = fixedwidth_bins(delta, smin, smax)
    arange = zip(BINS['min'], BINS['max'])
    bins = BINS['Nbins']

    # create empty grid with the right dimensions (and get the edges)
    grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False)
    grid *= 0.0
    h = grid.copy()

    pm = ProgressMeter(u.trajectory.n_frames, interval=interval, quiet=quiet,
                       format="Histogramming %(n_atoms)6d atoms in frame "
                       "%(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")
    start, stop, step = u.trajectory.check_slice_indices(start, stop, step)                    
    for ts in u.trajectory[start:stop:step]:
        if update_selection:
           group = u.select_atoms(atomselection)
           coord=group.positions
        else:
           coord = current_coordinates()

        pm.echo(ts.frame, n_atoms=len(coord))
        if len(coord) == 0:
            continue

        h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False)
        grid += h  # accumulate average histogram
   
    
    n_frames = len(range(start, stop, step))
    grid /= float(n_frames)

    # pick from kwargs
    metadata = kwargs.pop('metadata', {})
    metadata['psf'] = u.filename
    metadata['dcd'] = u.trajectory.filename
    metadata['atomselection'] = atomselection
    metadata['n_frames'] = n_frames
    metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3)
    metadata['dt'] = u.trajectory.dt
    metadata['time_unit'] = MDAnalysis.core.flags['time_unit']
    try:
        metadata['trajectory_skip'] = u.trajectory.skip_timestep  # frames
    except AttributeError:
        metadata['trajectory_skip'] = 1  # seems to not be used..
    try:
        metadata['trajectory_delta'] = u.trajectory.delta  # in native units
    except AttributeError:
        metadata['trajectory_delta'] = 1
    if cutoff > 0 and soluteselection is not None:
        metadata['soluteselection'] = soluteselection
        metadata['cutoff'] = cutoff  # in Angstrom

    parameters = kwargs.pop('parameters', {})
    parameters['isDensity'] = False  # must override

    # all other kwargs are discarded

    g = Density(grid=grid, edges=edges, units={'length': MDAnalysis.core.flags['length_unit']},
                parameters=parameters, metadata=metadata)
    g.make_density()
    logger.info("Density completed (initial density in Angstrom**-3)")

    return g
Exemple #24
0
class AnalysisBase(object):
    """Base class for defining multi frame analysis

    The class it is designed as a template for creating multiframe analyses.
    This class will automatically take care of setting up the trajectory
    reader for iterating, and it offers to show a progress meter.

    To define a new Analysis, `AnalysisBase` needs to be subclassed
    `_single_frame` must be defined. It is also possible to define
    `_prepare` and `_conclude` for pre and post processing. See the example
    below.

    .. code-block:: python

       class NewAnalysis(AnalysisBase):
           def __init__(self, atomgroup, parameter, **kwargs):
               super(NewAnalysis, self).__init__(atomgroup.universe.trajectory,
                                                 **kwargs)
               self._parameter = parameter
               self._ag = atomgroup

           def _prepare(self):
               # OPTIONAL
               # Called before iteration on the trajectory has begun.
               # Data structures can be set up at this time
               self.result = []

           def _single_frame(self):
               # REQUIRED
               # Called after the trajectory is moved onto each new frame.
               # store result of `some_function` for a single frame
               self.result.append(some_function(self._ag, self._parameter))

           def _conclude(self):
               # OPTIONAL
               # Called once iteration on the trajectory is finished.
               # Apply normalisation and averaging to results here.
               self.result = np.asarray(self.result) / np.sum(self.result)

    Afterwards the new analysis can be run like this.

    .. code-block:: python

       na = NewAnalysis(u.select_atoms('name CA'), 35).run(start=10, stop=20)
       print(na.result)

    """

    def __init__(self, trajectory, verbose=False, **kwargs):
        """
        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        verbose : bool, optional
           Turn on more logging and debugging, default ``False``
        """
        self._trajectory = trajectory
        self._verbose = verbose
        # do deprecated kwargs
        # remove in 1.0
        deps = []
        for arg in ['start', 'stop', 'step']:
            if arg in kwargs and not kwargs[arg] is None:
                deps.append(arg)
                setattr(self, arg, kwargs[arg])
        if deps:
            warnings.warn('Setting the following kwargs should be '
                          'done in the run() method: {}'.format(
                              ', '.join(deps)),
                          DeprecationWarning)

    def _setup_frames(self, trajectory, start=None, stop=None, step=None):
        """
        Pass a Reader object and define the desired iteration pattern
        through the trajectory

        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        """
        self._trajectory = trajectory
        # TODO: Remove once start/stop/step are deprecated from init
        # See if these have been set as class attributes, and use that
        start = getattr(self, 'start', start)
        stop = getattr(self, 'stop', stop)
        step = getattr(self, 'step', step)
        start, stop, step = trajectory.check_slice_indices(start, stop, step)
        self.start = start
        self.stop = stop
        self.step = step
        self.n_frames = len(range(start, stop, step))
        interval = int(self.n_frames // 100)
        if interval == 0:
            interval = 1

        verbose = getattr(self, '_verbose', False)
        self._pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                 interval=interval, verbose=verbose)

    def _single_frame(self):
        """Calculate data from a single frame of trajectory

        Don't worry about normalising, just deal with a single frame.
        """
        raise NotImplementedError("Only implemented in child classes")

    def _prepare(self):
        """Set things up before the analysis loop begins"""
        pass

    def _conclude(self):
        """Finalise the results you've gathered.

        Called at the end of the run() method to finish everything up.
        """
        pass

    def run(self, start=None, stop=None, step=None, verbose=None):
        """Perform the calculation

        Parameters
        ----------
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        verbose : bool, optional
            Turn on verbosity
        """
        logger.info("Choosing frames to analyze")
        # if verbose unchanged, use class default
        verbose = getattr(self, '_verbose', False) if verbose is None else verbose

        self._setup_frames(self._trajectory, start, stop, step)
        logger.info("Starting preparation")
        self._prepare()
        for i, ts in enumerate(
                self._trajectory[self.start:self.stop:self.step]):
            self._frame_index = i
            self._ts = ts
            # logger.info("--> Doing frame {} of {}".format(i+1, self.n_frames))
            self._single_frame()
            self._pm.echo(self._frame_index)
        logger.info("Finishing up")
        self._conclude()
        return self
Exemple #25
0
def rms_fit_trj(traj, reference, select='all', filename=None, rmsdfile=None, prefix='rmsfit_',
                mass_weighted=False, tol_mass=0.1, strict=False, force=True, quiet=False, **kwargs):
    """RMS-fit trajectory to a reference structure using a selection.

    Both reference *ref* and trajectory *traj* must be
    :class:`MDAnalysis.Universe` instances. If they contain a
    trajectory then it is used. The output file format is determined
    by the file extension of *filename*. One can also use the same
    universe if one wants to fit to the current frame.

    :Arguments:
      *traj*
         trajectory, :class:`MDAnalysis.Universe` object
      *reference*
         reference coordinates; :class:`MDAnalysis.Universe` object
         (uses the current time step of the object)
      *select*
         1. any valid selection string for
            :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` that produces identical
            selections in *mobile* and *reference*; or
         2. a dictionary ``{'mobile':sel1, 'reference':sel2}`` (the
            :func:`fasta2select` function returns such a
            dictionary based on a ClustalW_ or STAMP_ sequence alignment); or
         3. a tuple ``(sel1, sel2)``

         When using 2. or 3. with *sel1* and *sel2* then these selections can also each be
         a list of selection strings (to generate a AtomGroup with defined atom order as
         described under :ref:`ordered-selections-label`).
      *filename*
         file name for the RMS-fitted trajectory or pdb; defaults to the
         original trajectory filename (from *traj*) with *prefix* prepended
      *rmsdfile*
         file name for writing the RMSD timeseries [``None``]
      *prefix*
         prefix for autogenerating the new output filename
      *mass_weighted*
         do a mass-weighted RMSD fit
      *tol_mass*
         Reject match if the atomic masses for matched atoms differ by more than
         *tol_mass* [0.1]
      *strict*
         Default: ``False``
         - ``True``: Will raise :exc:`SelectioError` if a single atom does not
           match between the two selections.
         - ``False``: Will try to prepare a matching selection by dropping
           residues with non-matching atoms. See :func:`get_matching_atoms`
           for details.
      *force*
         - ``True``: Overwrite an existing output trajectory (default)
         - ``False``: simply return if the file already exists
      *quiet*
         - ``True``: suppress progress and logging for levels INFO and below.
         - ``False``: show all status messages and do not change the the logging
           level (default)

         .. Note:: If


      *kwargs*
         All other keyword arguments are passed on the trajectory
         :class:`~MDAnalysis.coordinates.base.Writer`; this allows manipulating/fixing
         trajectories on the fly (e.g. change the output format by changing the extension of *filename*
         and setting different parameters as described for the corresponding writer).

    :Returns: *filename* (either provided or auto-generated)

    .. _ClustalW: http://www.clustal.org/
    .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/

    .. versionchanged:: 0.8
       Added *kwargs* to be passed to the trajectory :class:`~MDAnalysis.coordinates.base.Writer` and
       *filename* is returned.

    .. versionchanged:: 0.10.0
       Uses :func:`get_matching_atoms` to work with incomplete selections
       and new *strict* keyword. The new default is to be lenient whereas
       the old behavior was the equivalent of *strict* = ``True``.

    """
    frames = traj.trajectory
    if quiet:
        # should be part of a try ... finally to guarantee restoring the log level
        logging.disable(logging.WARN)

    kwargs.setdefault('remarks', 'RMS fitted trajectory to reference')
    if filename is None:
        path, fn = os.path.split(frames.filename)
        filename = os.path.join(path, prefix + fn)
        _Writer = frames.Writer
    else:
        _Writer = frames.OtherWriter
    if os.path.exists(filename) and not force:
        logger.warn("{0} already exists and will NOT be overwritten; use force=True if you want this".format(filename))
        return filename
    writer = _Writer(filename, **kwargs)
    del _Writer

    select = rms._process_selection(select)
    ref_atoms = reference.select_atoms(*select['reference'])
    traj_atoms = traj.select_atoms(*select['mobile'])
    natoms = traj_atoms.n_atoms

    ref_atoms, traj_atoms = get_matching_atoms(ref_atoms, traj_atoms,
                                                 tol_mass=tol_mass, strict=strict)

    logger.info("RMS-fitting on {0:d} atoms.".format(len(ref_atoms)))
    if mass_weighted:
        # if performing a mass-weighted alignment/rmsd calculation
        weight = ref_atoms.masses / ref_atoms.masses.mean()
    else:
        weight = None

    # reference centre of mass system
    ref_com = ref_atoms.center_of_mass()
    ref_coordinates = ref_atoms.positions - ref_com

    # allocate the array for selection atom coords
    traj_coordinates = traj_atoms.positions.copy()

    # RMSD timeseries
    nframes = len(frames)
    rmsd = np.zeros((nframes,))

    # R: rotation matrix that aligns r-r_com, x~-x~com
    #    (x~: selected coordinates, x: all coordinates)
    # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com
    rot = np.zeros(9, dtype=np.float64)  # allocate space for calculation
    R = np.matrix(rot.reshape(3, 3))

    percentage = ProgressMeter(nframes, interval=10, quiet=quiet,
                               format="Fitted frame %(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")

    for k, ts in enumerate(frames):
        # shift coordinates for rotation fitting
        # selection is updated with the time frame
        x_com = traj_atoms.center_of_mass().astype(np.float32)
        traj_coordinates[:] = traj_atoms.positions - x_com

        # Need to transpose coordinates such that the coordinate array is
        # 3xN instead of Nx3. Also qcp requires that the dtype be float64
        # (I think we swapped the position of ref and traj in CalcRMSDRotationalMatrix
        # so that R acts **to the left** and can be broadcasted; we're saving
        # one transpose. [orbeckst])
        rmsd[k] = qcp.CalcRMSDRotationalMatrix(ref_coordinates.T.astype(np.float64),
                                               traj_coordinates.T.astype(np.float64),
                                               natoms, rot, weight)
        R[:, :] = rot.reshape(3, 3)

        # Transform each atom in the trajectory (use inplace ops to avoid copying arrays)
        # (Marginally (~3%) faster than "ts.positions[:] = (ts.positions - x_com) * R + ref_com".)
        ts.positions -= x_com
        ts.positions[:] = ts.positions * R  # R acts to the left & is broadcasted N times.
        ts.positions += ref_com

        writer.write(traj.atoms)  # write whole input trajectory system
        percentage.echo(ts.frame)
    logger.info("Wrote %d RMS-fitted coordinate frames to file %r",
                frames.n_frames, filename)
    if rmsdfile is not None:
        np.savetxt(rmsdfile, rmsd)
        logger.info("Wrote RMSD timeseries  to file %r", rmsdfile)

    if quiet:
        # should be part of a try ... finally to guarantee restoring the log level
        logging.disable(logging.NOTSET)

    return filename
Exemple #26
0
def density_from_Universe(universe,
                          delta=1.0,
                          atomselection='name OH2',
                          start=None,
                          stop=None,
                          step=None,
                          metadata=None,
                          padding=2.0,
                          cutoff=0,
                          soluteselection=None,
                          use_kdtree=True,
                          update_selection=False,
                          quiet=False,
                          interval=1,
                          **kwargs):
    """Create a density grid from a :class:`MDAnalysis.Universe` object.

    The trajectory is read, frame by frame, and the atoms selected with *atomselection* are
    histogrammed on a grid with spacing *delta*::

      density_from_Universe(universe, delta=1.0, atomselection='name OH2', ...) --> density

    .. Note:: By default, the *atomselection* is static, i.e., atoms are only
              selected once at the beginning. If you want dynamically changing
              selections (such as "name OW and around 4.0 (protein and not name
              H*)") then set ``update_selection=True``. For the special case of
              calculating a density of the "bulk" solvent away from a solute
              use the optimized selections with keywords *cutoff* and
              *soluteselection*.

    :Arguments:
      universe
            :class:`MDAnalysis.Universe` object with a trajectory

    :Keywords:
      atomselection
            selection string (MDAnalysis syntax) for the species to be analyzed
            ["name OH2"]
      delta
            bin size for the density grid in Angstroem (same in x,y,z) [1.0]
      start, stop, step
            Slice the trajectory as ``trajectory[start"stop:step]``; default
            is to read the whole trajectory.
      metadata
            dictionary of additional data to be saved with the object
      padding
            increase histogram dimensions by padding (on top of initial box size)
            in Angstroem [2.0]
      soluteselection
            MDAnalysis selection for the solute, e.g. "protein" [``None``]
      cutoff
            With *cutoff*, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>"
            (Special routines that are faster than the standard ``AROUND`` selection)
            [0]
      update_selection
            Should the selection of atoms be updated for every step? [``False``]
            - ``True``: atom selection is updated for each frame, can be slow
            - ``False``: atoms are only selected at the beginning
      quiet
            Print status update to the screen for every *interval* frame? [``False``]
            - ``True``: no status updates when a new frame is processed
            - ``False``: status update every frame (including number of atoms
              processed, which is interesting with ``update_selection=True``)
      interval
           Show status update every *interval* frame [1]
      parameters
            dict with some special parameters for :class:`Density` (see doc)
      kwargs
            metadata, parameters are modified and passed on to :class:`Density`

    :Returns: :class:`Density`

    .. versionchanged:: 0.13.0
       *update_selection* and *quite* keywords added

    """
    try:
        universe.select_atoms('all')
        universe.trajectory.ts
    except AttributeError:
        raise TypeError(
            "The universe must be a proper MDAnalysis.Universe instance.")
    u = universe
    if cutoff > 0 and soluteselection is not None:
        # special fast selection for '<atomsel> not within <cutoff> of <solutesel>'
        notwithin_coordinates = notwithin_coordinates_factory(
            u, atomselection, soluteselection, cutoff, use_kdtree=use_kdtree)

        def current_coordinates():
            return notwithin_coordinates()
    else:
        group = u.select_atoms(atomselection)

        def current_coordinates():
            return group.coordinates()

    coord = current_coordinates()
    logger.info(
        "Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total.".
        format(coord.shape[0], len(u.select_atoms(atomselection)),
               atomselection, len(u.atoms)))

    # mild warning; typically this is run on RMS-fitted trajectories and
    # so the box information is rather meaningless
    box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[
        3:]
    if tuple(angles) != (90., 90., 90.):
        msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!"
        warnings.warn(msg)
        logger.warn(msg)

    # Make the box bigger to avoid as much as possible 'outlier'. This
    # is important if the sites are defined at a high density: in this
    # case the bulk regions don't have to be close to 1 * n0 but can
    # be less. It's much more difficult to deal with outliers.  The
    # ideal solution would use images: implement 'looking across the
    # periodic boundaries' but that gets complicate when the box
    # rotates due to RMS fitting.
    smin = np.min(coord, axis=0) - padding
    smax = np.max(coord, axis=0) + padding

    BINS = fixedwidth_bins(delta, smin, smax)
    arange = zip(BINS['min'], BINS['max'])
    bins = BINS['Nbins']

    # create empty grid with the right dimensions (and get the edges)
    grid, edges = np.histogramdd(np.zeros((1, 3)),
                                 bins=bins,
                                 range=arange,
                                 normed=False)
    grid *= 0.0
    h = grid.copy()

    pm = ProgressMeter(u.trajectory.n_frames,
                       interval=interval,
                       quiet=quiet,
                       format="Histogramming %(n_atoms)6d atoms in frame "
                       "%(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")
    for ts in u.trajectory[start:stop:step]:
        if update_selection:
            group = u.select_atoms(atomselection)
            coord = group.positions
        else:
            coord = current_coordinates()

        pm.echo(ts.frame, n_atoms=len(coord))
        if len(coord) == 0:
            continue

        h[:], edges[:] = np.histogramdd(coord,
                                        bins=bins,
                                        range=arange,
                                        normed=False)
        grid += h  # accumulate average histogram
    n_frames = u.trajectory.n_frames
    grid /= float(n_frames)

    # pick from kwargs
    metadata = kwargs.pop('metadata', {})
    metadata['psf'] = u.filename
    metadata['dcd'] = u.trajectory.filename
    metadata['atomselection'] = atomselection
    metadata['n_frames'] = n_frames
    metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3)
    metadata['dt'] = u.trajectory.dt
    metadata['time_unit'] = MDAnalysis.core.flags['time_unit']
    try:
        metadata['trajectory_skip'] = u.trajectory.skip_timestep  # frames
    except AttributeError:
        metadata['trajectory_skip'] = 1  # seems to not be used..
    try:
        metadata['trajectory_delta'] = u.trajectory.delta  # in native units
    except AttributeError:
        metadata['trajectory_delta'] = 1
    if cutoff > 0 and soluteselection is not None:
        metadata['soluteselection'] = soluteselection
        metadata['cutoff'] = cutoff  # in Angstrom

    parameters = kwargs.pop('parameters', {})
    parameters['isDensity'] = False  # must override

    # all other kwargs are discarded

    g = Density(grid=grid,
                edges=edges,
                units={'length': MDAnalysis.core.flags['length_unit']},
                parameters=parameters,
                metadata=metadata)
    g.make_density()
    logger.info("Density completed (initial density in Angstrom**-3)")

    return g
Exemple #27
0
def density_from_Universe(universe, delta=1.0, atomselection='name OH2',
                          start=None, stop=None, step=None,
                          metadata=None, padding=2.0, cutoff=0, soluteselection=None,
                          use_kdtree=True, update_selection=False,
                          verbose=None, interval=1, quiet=None,
                          parameters=None):
    """Create a density grid from a :class:`MDAnalysis.Universe` object.

    The trajectory is read, frame by frame, and the atoms selected with `atomselection` are
    histogrammed on a grid with spacing `delta`.

    Parameters
    ----------
    universe : MDAnalysis.Universe
            :class:`MDAnalysis.Universe` object with a trajectory
    atomselection : str (optional)
            selection string (MDAnalysis syntax) for the species to be analyzed
            ["name OH2"]
    delta : float (optional)
            bin size for the density grid in Angstroem (same in x,y,z) [1.0]
    start : int (optional)
    stop : int (optional)
    step : int (optional)
            Slice the trajectory as ``trajectory[start:stop:step]``; default
            is to read the whole trajectory.
    metadata : dict. optional
            `dict` of additional data to be saved with the object; the meta data
            are passed through as they are.
    padding : float (optional)
            increase histogram dimensions by padding (on top of initial box size)
            in Angstroem [2.0]
    soluteselection : str (optional)
            MDAnalysis selection for the solute, e.g. "protein" [``None``]
    cutoff : float (optional)
            With `cutoff`, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>"
            (Special routines that are faster than the standard ``AROUND`` selection);
            any value that evaluates to ``False`` (such as the default 0) disables this
            special selection.
    update_selection : bool (optional)
            Should the selection of atoms be updated for every step? [``False``]

            - ``True``: atom selection is updated for each frame, can be slow
            - ``False``: atoms are only selected at the beginning
    verbose : bool (optional)
            Print status update to the screen for every *interval* frame? [``True``]

            - ``False``: no status updates when a new frame is processed
            - ``True``: status update every frame (including number of atoms
              processed, which is interesting with ``update_selection=True``)
    interval : int (optional)
           Show status update every `interval` frame [1]
    parameters : dict (optional)
            `dict` with some special parameters for :class:`Density` (see docs)

    Returns
    -------
    :class:`Density`
            A :class:`Density` instance with the histogrammed data together
            with associated metadata.


    Notes
    -----

    By default, the `atomselection` is static, i.e., atoms are only selected
    once at the beginning. If you want *dynamically changing selections* (such
    as "name OW and around 4.0 (protein and not name H*)", i.e., the water
    oxygen atoms that are within 4 Å of the protein heavy atoms) then set
    ``update_selection=True``. For the special case of calculating a density of
    the "bulk" solvent away from a solute use the optimized selections with
    keywords *cutoff* and *soluteselection* (see Examples below).

    Examples
    --------
    Basic use for creating a water density (just using the water oxygen atoms "OW")::

      density = density_from_Universe(universe, delta=1.0, atomselection='name OW')

    If you are only interested in water within a certain region, e.g., within a
    vicinity around a binding site, you can use a selection that updates every
    step by setting the `update_selection` keyword argument::

      site_density = density_from_Universe(universe, delta=1.0,
                                           atomselection='name OW and around 5 (resid 156 157 305)',
                                           update_selection=True)

    A special case for an updating selection is to create the "bulk density",
    i.e., the water outside the immediate solvation shell of a protein: Select
    all water oxygen atoms that are *farther away* than a given cut-off (say, 4
    Å) from the solute (here, heavy atoms of the protein)::

      bulk = density_from_Universe(universe, delta=1.0, atomselection='name OW',
                                   solute="protein and not name H*",
                                   cutoff=4)

    (Using the special case for the bulk with `soluteselection` and `cutoff`
    improves performance over the simple `update_selection` approach.)

    .. versionchanged:: 0.13.0
       *update_selection* and *quiet* keywords added

    .. deprecated:: 0.16
       The keyword argument *quiet* is deprecated in favor of *verbose*.

    """
    u = universe

    if cutoff > 0 and soluteselection is not None:
        # special fast selection for '<atomsel> not within <cutoff> of <solutesel>'
        notwithin_coordinates = notwithin_coordinates_factory(
            u, atomselection, soluteselection, cutoff,
            use_kdtree=use_kdtree, updating_selection=update_selection)
        def current_coordinates():
            return notwithin_coordinates()
    else:
        group = u.select_atoms(atomselection, updating=update_selection)

        def current_coordinates():
            return group.positions

    coord = current_coordinates()
    logger.info(
        "Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total."
        "".format(coord.shape[0], len(u.select_atoms(atomselection)),
                  atomselection, len(u.atoms))
    )

    # mild warning; typically this is run on RMS-fitted trajectories and
    # so the box information is rather meaningless
    box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:]
    if tuple(angles) != (90., 90., 90.):
        msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!"
        warnings.warn(msg)
        logger.warning(msg)

    # Make the box bigger to avoid as much as possible 'outlier'. This
    # is important if the sites are defined at a high density: in this
    # case the bulk regions don't have to be close to 1 * n0 but can
    # be less. It's much more difficult to deal with outliers.  The
    # ideal solution would use images: implement 'looking across the
    # periodic boundaries' but that gets complicate when the box
    # rotates due to RMS fitting.
    smin = np.min(coord, axis=0) - padding
    smax = np.max(coord, axis=0) + padding

    BINS = fixedwidth_bins(delta, smin, smax)
    arange = np.vstack((BINS['min'], BINS['max']))
    arange = np.transpose(arange)
    bins = BINS['Nbins']

    # create empty grid with the right dimensions (and get the edges)
    grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False)
    grid *= 0.0
    h = grid.copy()

    pm = ProgressMeter(u.trajectory.n_frames, interval=interval,
                       verbose=verbose, quiet=quiet,
                       format="Histogramming %(n_atoms)6d atoms in frame "
                       "%(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")
    start, stop, step = u.trajectory.check_slice_indices(start, stop, step)
    for ts in u.trajectory[start:stop:step]:
        coord = current_coordinates()

        pm.echo(ts.frame, n_atoms=len(coord))
        if len(coord) == 0:
            continue

        h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False)
        grid += h  # accumulate average histogram

    n_frames = len(range(start, stop, step))
    grid /= float(n_frames)

    metadata = metadata if metadata is not None else {}
    metadata['psf'] = u.filename
    metadata['dcd'] = u.trajectory.filename
    metadata['atomselection'] = atomselection
    metadata['n_frames'] = n_frames
    metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3)
    metadata['dt'] = u.trajectory.dt
    metadata['time_unit'] = MDAnalysis.core.flags['time_unit']
    try:
        metadata['trajectory_skip'] = u.trajectory.skip_timestep  # frames
    except AttributeError:
        metadata['trajectory_skip'] = 1  # seems to not be used..
    try:
        metadata['trajectory_delta'] = u.trajectory.delta  # in native units
    except AttributeError:
        metadata['trajectory_delta'] = 1
    if cutoff > 0 and soluteselection is not None:
        metadata['soluteselection'] = soluteselection
        metadata['cutoff'] = cutoff  # in Angstrom

    parameters = parameters if parameters is not None else {}
    parameters['isDensity'] = False  # must override


    g = Density(grid=grid, edges=edges, units={'length': MDAnalysis.core.flags['length_unit']},
                parameters=parameters, metadata=metadata)
    g.make_density()
    logger.info("Density completed (initial density in Angstrom**-3)")

    return g
Exemple #28
0
    def run(self, start=None, stop=None, step=None, verbose=None, debug=None):
        """Analyze trajectory and produce timeseries.

        Stores the water bridge data per frame as
        :attr:`WaterBridgeAnalysis.timeseries` (see there for output
        format).

        Parameters
        ----------
        start : int (optional)
            starting frame-index for analysis, ``None`` is the first one, 0.
            `start` and `stop` are 0-based frame indices and are used to slice
            the trajectory (if supported) [``None``]
        stop : int (optional)
            last trajectory frame for analysis, ``None`` is the last one
            [``None``]
        step : int (optional)
            read every `step` between `start` (included) and `stop` (excluded),
            ``None`` selects 1. [``None``]
        verbose : bool (optional)
             toggle progress meter output
             :class:`~MDAnalysis.lib.log.ProgressMeter` [``True``]
        debug : bool (optional)
             enable detailed logging of debugging information; this can create
             *very big* log files so it is disabled (``False``) by default;
             setting `debug` toggles the debug status for
             :class:`WaterBridgeAnalysis`, namely the value of
             :attr:`WaterBridgeAnalysis.debug`.

        See Also
        --------
        :meth:`WaterBridgeAnalysis.generate_table` :
               processing the data into a different format.
        """
        self._setup_frames(self.u.trajectory, start, stop, step)

        logger.info("WBridge analysis: starting")
        logger.debug("WBridge analysis: donors    %r", self.donors)
        logger.debug("WBridge analysis: acceptors %r", self.acceptors)
        logger.debug("WBridge analysis: water bridge %r", self.water_selection)

        if debug is not None and debug != self.debug:
            self.debug = debug
            logger.debug("Toggling debug to %r", self.debug)
        if not self.debug:
            logger.debug("WBridge analysis: For full step-by-step debugging output use debug=True")

        self._timeseries = []
        self.timesteps = []
        self._water_network = []

        if verbose is None:
            verbose = self._verbose
        pm = ProgressMeter(self.n_frames,
                           format="WBridge frame {current_step:5d}: {step:5d}/{numsteps} [{percentage:5.1f}%]\r",
                           verbose=verbose)

        logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)",
                    self.start, self.stop, self.step)

        for progress, ts in enumerate(self.u.trajectory[self.start:self.stop:self.step]):
            # all bonds for this timestep

            # dict of tuples (atom.index, atom.index) for quick check if
            # we already have the bond (to avoid duplicates)

            frame = ts.frame
            timestep = ts.time
            self.timesteps.append(timestep)
            pm.echo(progress, current_step=frame)
            self.logger_debug("Analyzing frame %(frame)d, timestep %(timestep)f ps", vars())
            if self.update_selection1:
                self._update_selection_1()
            if self.update_selection2:
                self._update_selection_2()
            if self.update_water_selection:
                self._update_water_selection()

            s1_frame_results_dict = defaultdict(list)
            if (self.selection1_type in ('donor', 'both') and
                self._water_acceptors):

                self.logger_debug("Selection 1 Donors <-> Water Acceptors")
                ns_acceptors = AtomNeighborSearch(self._water_acceptors)
                for i, donor_h_set in self._s1_donors_h.items():
                    d = self._s1_donors[i]
                    for h in donor_h_set:
                        res = ns_acceptors.search(h, self.distance)
                        for a in res:
                            donor_atom = h if self.distance_type != 'heavy' else d
                            dist = distances.calc_bonds(donor_atom.position,
                                                        a.position)
                            if dist <= self.distance:
                                angle = distances.calc_angles(d.position, h.position,
                                                             a.position)
                                angle = np.rad2deg(angle)
                                if angle >= self.angle:
                                    self.logger_debug(
                                        "S1-D: {0!s} <-> W-A: {1!s} {2:f} A, {3:f} DEG"\
                                        .format(h.index, a.index, dist, angle))
                                    s1_frame_results_dict[(a.resname, a.resid)].append(
                                        (h.index, a.index,
                                        (h.resname, h.resid, h.name),
                                        (a.resname, a.resid, a.name),
                                        dist, angle))

            if (self.selection1_type in ('acceptor', 'both') and
                self._s1_acceptors):

                self.logger_debug("Selection 1 Acceptors <-> Water Donors")
                ns_acceptors = AtomNeighborSearch(self._s1_acceptors)
                for i, donor_h_set in self._water_donors_h.items():
                    d = self._water_donors[i]
                    for h in donor_h_set:
                        res = ns_acceptors.search(h, self.distance)
                        for a in res:
                            donor_atom = h if self.distance_type != 'heavy' else d
                            dist = distances.calc_bonds(donor_atom.position,
                                                        a.position)
                            if dist <= self.distance:
                                angle = distances.calc_angles(d.position, h.position,
                                                             a.position)
                                angle = np.rad2deg(angle)
                                if angle >= self.angle:
                                    self.logger_debug(
                                        "S1-A: {0!s} <-> W-D: {1!s} {2:f} A, {3:f} DEG"\
                                        .format(a.index, h.index, dist, angle))
                                    s1_frame_results_dict[(h.resname, h.resid)].append(
                                        (h.index, a.index,
                                        (h.resname, h.resid, h.name),
                                        (a.resname, a.resid, a.name),
                                        dist, angle))

            # Narrow down the water selection
            selection_resn_id = list(s1_frame_results_dict.keys())
            if not selection_resn_id:
                self._timeseries.append([])
                continue
            selection_resn_id = ['(resname {} and resid {})'.format(
                resname, resid) for resname, resid in selection_resn_id]
            water_bridges = self._water.select_atoms(' or '.join(selection_resn_id))
            self.logger_debug("Size of water bridge selection: {0} atoms".format(len(water_bridges)))
            if not water_bridges:
                logger.warning("No water forming hydrogen bonding with selection 1.")
            water_bridges_donors = water_bridges.select_atoms(
                'name {0}'.format(' '.join(self.donors)))
            water_bridges_donors_h = {}
            for i, d in enumerate(water_bridges_donors):
                tmp = self._get_bonded_hydrogens(d)
                if tmp:
                    water_bridges_donors_h[i] = tmp
            self.logger_debug("water bridge donors: {0}".format(len(water_bridges_donors)))
            self.logger_debug("water bridge donor hydrogens: {0}".format(len(water_bridges_donors_h)))
            water_bridges_acceptors = water_bridges.select_atoms(
                'name {0}'.format(' '.join(self.acceptors)))
            self.logger_debug("water bridge: {0}".format(len(water_bridges_acceptors)))

            # Finding the hydrogen bonds between water bridge and selection 2
            s2_frame_results_dict = defaultdict(list)
            if self._s2_acceptors:
                self.logger_debug("Water bridge Donors <-> Selection 2 Acceptors")
                ns_acceptors = AtomNeighborSearch(self._s2_acceptors)
                for i, donor_h_set in water_bridges_donors_h.items():
                    d = water_bridges_donors[i]
                    for h in donor_h_set:
                        res = ns_acceptors.search(h, self.distance)
                        for a in res:
                            donor_atom = h if self.distance_type != 'heavy'  else d
                            dist = distances.calc_bonds(donor_atom.position,
                                                        a.position)
                            if dist <= self.distance:
                                angle = distances.calc_angles(d.position, h.position,
                                                             a.position)
                                angle = np.rad2deg(angle)
                                if angle >= self.angle:
                                    self.logger_debug(
                                        "WB-D: {0!s} <-> S2-A: {1!s} {2:f} A, {3:f} DEG"\
                                        .format(h.index, a.index, dist, angle))
                                    s2_frame_results_dict[(h.resname, h.resid)].append(
                                        (h.index, a.index,
                                        (h.resname, h.resid, h.name),
                                        (a.resname, a.resid, a.name),
                                        dist, angle))

            if water_bridges_acceptors:
                self.logger_debug("Selection 2 Donors <-> Selection 2 Acceptors")
                ns_acceptors = AtomNeighborSearch(water_bridges_acceptors)
                for i, donor_h_set in self._s2_donors_h.items():
                    d = self._s2_donors[i]
                    for h in donor_h_set:
                        res = ns_acceptors.search(h, self.distance)
                        for a in res:
                            donor_atom = h if self.distance_type != 'heavy' else d
                            dist = distances.calc_bonds(donor_atom.position,
                                                        a.position)
                            if dist <= self.distance:
                                angle = distances.calc_angles(d.position, h.position,
                                                             a.position)
                                angle = np.rad2deg(angle)
                                if angle >= self.angle:
                                    self.logger_debug(
                                        "WB-A: {0!s} <-> S2-D: {1!s} {2:f} A, {3:f} DEG"\
                                        .format(a.index, h.index, dist, angle))
                                    s2_frame_results_dict[(a.resname, a.resid)].append(
                                        (h.index, a.index,
                                        (h.resname, h.resid, h.name),
                                        (a.resname, a.resid, a.name),
                                        dist, angle))

            # Generate the water network
            water_network = {}
            for key in s2_frame_results_dict:
                s1_frame_results = set(s1_frame_results_dict[key])
                s2_frame_results = set(s2_frame_results_dict[key])
                if len(s1_frame_results.union(s2_frame_results)) > 1:
                    # Thus if selection 1 and selection 2 are the same and both
                    # only form a single hydrogen bond with a water, this entry
                    # won't be included.
                    water_network[key] = [s1_frame_results,
                    s2_frame_results.difference(s1_frame_results)]
            # Generate frame_results
            frame_results = []
            for s1_frame_results, s2_frame_results in water_network.values():
                frame_results.extend(list(s1_frame_results))
                frame_results.extend(list(s2_frame_results))

            self._timeseries.append(frame_results)
            self._water_network.append(water_network)


        logger.info("WBridge analysis: complete; timeseries  %s.timeseries",
                    self.__class__.__name__)
def helanal_trajectory(universe, selection="name CA", start=None, end=None, begin=None, finish=None,
                       matrix_filename="bending_matrix.dat", origin_pdbfile="origin.pdb",
                       summary_filename="summary.txt", screw_filename="screw.xvg",
                       tilt_filename="local_tilt.xvg", fitted_tilt_filename="fit_tilt.xvg",
                       bend_filename="local_bend.xvg", twist_filename="unit_twist.xvg",
                       prefix="helanal_", ref_axis=None, quiet=False):
    """Perform HELANAL_ helix analysis on all frames in *universe*.

    .. Note::

       Only a single helix is analyzed. Use the selection to specify the
       helix, e.g. with "name CA and resid 1:20" or use start=1, stop=20.

    :Arguments:
       *universe*
          :class:`~MDAnalysis.core.AtomGroup.Universe`

    :Keywords:
       *selection*
          selection string that selects Calpha atoms [``"name CA"``]
       *start*
          start residue resid
       *end*
          end residue resid
       *begin*
          start analysing for time (ps) >= *begin*; ``None`` starts from the
          beginning [``None``]
       *finish*
          stop analysis for time (ps) =< *finish*; ``None`` goes to the
          end of the trajectory [``None``]
       *matrix_filename*
          Output file- bending matrix [``"bending_matrix.dat"``]
       *origin_pdbfile*
          Output file- origin pdb file [``"origin.pdb"``]
       *summary_filename*
          Output file- all of the basic data [``"summary.txt"``]
       *screw_filename*
          Output file- local tilts of individual residues from 2 to n-1
          [``"screw.xvg"``]
       *tilt_filename*
          Output file- tilt of line of best fit applied to origin axes
          [``"local_tilt.xvg"``]
       *bend_filename*
          Output file- local bend angles between successive local helix axes
          [``"local_bend.xvg"``]
       *twist_filename*
          Output file- local unit twist between successive helix turns
          [``"unit_twist.xvg"``]
       *prefix*
          Prefix to add to all output file names; set to ``None`` to disable
          [``"helanal__"``]
       *ref_axis*
          Calculate tilt angle relative to the axis; if ``None`` then ``[0,0,1]``
          is chosen [``None``]
       *quiet*
          Suppress most diagnostic output.

    :Raises:
       FinishTimeException
          If the specified finish time precedes the specified start time or
          current time stamp of trajectory object.

    .. versionchanged:: 0.13.0
       New *quiet* keyword to silence frame progress output and most of the
       output that used to be printed to stdout is now logged to the logger
       *MDAnalysis.analysis.helanal* (at logelevel *INFO*).
    """
    if ref_axis is None:
        ref_axis = np.array([0., 0., 1.])
    else:
        # enable MDA API so that one can use a tuple of atoms or AtomGroup with
        # two atoms
        ref_axis = np.asarray(ref_axis)

    if not (start is None and end is None):
        if start is None:
            start = universe.atoms[0].resid
        if end is None:
            end = universe.atoms[-1].resid
        selection += " and resid {start:d}:{end:d}".format(**vars())
    ca = universe.select_atoms(selection)
    trajectory = universe.trajectory

    if finish is not None:
        if trajectory.ts.time > finish:
            # you'd be starting with a finish time (in ps) that has already passed or not
            # available
            raise FinishTimeException(
                'The input finish time ({finish} ps) precedes the current trajectory time of {traj_time} ps.'.format(
                    finish=finish, traj_time=trajectory.time))

    if start is not None and end is not None:
        logger.info("Analysing from residue %d to %d", start, end)
    elif start is not None and end is None:
        logger.info("Analysing from residue %d to the C termini", start)
    elif start is None and end is not None:
        logger.info("Analysing from the N termini to %d", end)
    logger.info("Analysing %d/%d residues", ca.n_atoms, universe.atoms.n_residues)

    if prefix is not None:
        prefix = str(prefix)
        matrix_filename = prefix + matrix_filename
        origin_pdbfile = prefix + origin_pdbfile
        summary_filename = prefix + summary_filename
        screw_filename = prefix + screw_filename
        tilt_filename = prefix + tilt_filename
        fitted_tilt_filename = prefix + fitted_tilt_filename
        bend_filename = prefix + bend_filename
        twist_filename = prefix + twist_filename
    backup_file(matrix_filename)
    backup_file(origin_pdbfile)
    backup_file(summary_filename)
    backup_file(screw_filename)
    backup_file(tilt_filename)
    backup_file(fitted_tilt_filename)
    backup_file(bend_filename)
    backup_file(twist_filename)

    global_height = []
    global_twist = []
    global_rnou = []
    global_bending = []
    global_bending_matrix = []
    global_tilt = []
    global_fitted_tilts = []
    global_screw = []

    pm = ProgressMeter(trajectory.n_frames, quiet=quiet,
                       format="Frame %(step)10d: %(time)20.1f ps\r")
    for ts in trajectory:
        pm.echo(ts.frame, time=ts.time)
        frame = ts.frame
        if begin is not None:
            if trajectory.time < begin:
                continue
        if finish is not None:
            if trajectory.time > finish:
                break

        ca_positions = ca.positions
        twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles = \
            main_loop(ca_positions, ref_axis=ref_axis)

        origin_pdb(origins, origin_pdbfile)

        #calculate local bending matrix( it is looking at all i, j combinations)
        if len(global_bending_matrix) == 0:
            global_bending_matrix = [[[] for item in local_helix_axes] for item in local_helix_axes]

        for i in range(len(local_helix_axes)):
            for j in range(i + 1, len(local_helix_axes)):
                angle = np.rad2deg(np.arccos(np.dot(local_helix_axes[i], local_helix_axes[j])))
                global_bending_matrix[i][j].append(angle)
                #global_bending_matrix[j][i].append(angle)
                #global_bending_matrix[i][i].append(0.)

        fit_vector, fit_tilt = vector_of_best_fit(origins)
        global_height += height
        global_twist += twist
        global_rnou += rnou
        #global_screw.append(local_screw_angles)
        global_fitted_tilts.append(np.rad2deg(fit_tilt))

        #print out rotations across the helix to a file
        with open(twist_filename, "a") as twist_output:
            print(frame, end='', file=twist_output)
            for loc_twist in twist:
                print(loc_twist, end='', file=twist_output)
            print("", file=twist_output)

        with open(bend_filename, "a") as bend_output:
            print(frame, end='', file=bend_output)
            for loc_bend in bending_angles:
                print(loc_bend, end='', file=bend_output)
            print("", file=bend_output)

        with open(screw_filename, "a") as rot_output:
            print(frame, end='', file=rot_output)
            for rotation in local_screw_angles:
                print(rotation, end='', file=rot_output)
            print("", file=rot_output)

        with open(tilt_filename, "a") as tilt_output:
            print(frame, end='', file=tilt_output)
            for tilt in local_helix_axes:
                print(np.rad2deg(mdamath.angle(tilt, ref_axis)),
                      end='', file=tilt_output)
            print("", file=tilt_output)

        with open(fitted_tilt_filename, "a") as tilt_output:
            print(frame, np.rad2deg(fit_tilt), file=tilt_output)

        if len(global_bending) == 0:
            global_bending = [[] for item in bending_angles]
            #global_tilt = [ [] for item in local_helix_axes ]
        for store, tmp in zip(global_bending, bending_angles):
            store.append(tmp)
        #for store,tmp in zip(global_tilt,local_helix_axes): store.append(mdamath.angle(tmp,ref_axis))


    twist_mean, twist_sd, twist_abdev = stats(global_twist)
    height_mean, height_sd, height_abdev = stats(global_height)
    rnou_mean, rnou_sd, rnou_abdev = stats(global_rnou)
    ftilt_mean, ftilt_sd, ftilt_abdev = stats(global_fitted_tilts)

    bending_statistics = [stats(item) for item in global_bending]
    #tilt_statistics =    [ stats(item) for item in global_tilt]

    bending_statistics_matrix = [[stats(col) for col in row] for row in global_bending_matrix]
    with open(matrix_filename, 'w') as mat_output:
        print("Mean", file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[0])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

        print('\nSD', file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[1])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

        print("\nABDEV", file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[2])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

    logger.info("Height: %g  SD: %g  ABDEV: %g  (Angstroem)", height_mean, height_sd, height_abdev)
    logger.info("Twist: %g  SD: %g  ABDEV: %g", twist_mean, twist_sd, twist_abdev)
    logger.info("Residues/turn: %g  SD: %g  ABDEV: %g", rnou_mean, rnou_sd, rnou_abdev)
    logger.info("Fitted tilt: %g  SD: %g  ABDEV: %g", ftilt_mean, ftilt_sd, ftilt_abdev)
    logger.info("Local bending angles:")
    residue_statistics = zip(*bending_statistics)
    measure_names = ["Mean ", "SD   ", "ABDEV"]
    if start is None:
        output = " ".join(["{0:8d}".format(item)
                           for item in range(4, len(residue_statistics[0]) + 4)])
    else:
        output = " ".join(["{0:8d}".format(item)
                           for item in range(start + 3, len(residue_statistics[0]) + start + 3)])
    logger.info("ResID %s", output)
    for measure, name in zip(residue_statistics, measure_names):
        output = str(name) + " "
        output += " ".join(["{0:8.1f}".format(residue) for residue in measure])
        logger.info(output)

    with open(summary_filename, 'w') as summary_output:
        print("Height:", height_mean, "SD", height_sd, "ABDEV", height_abdev, '(nm)', file=summary_output)
        print("Twist:", twist_mean, "SD", twist_sd, "ABDEV", twist_abdev,
              file=summary_output)
        print("Residues/turn:", rnou_mean, "SD", rnou_sd, "ABDEV", rnou_abdev,
              file=summary_output)
        print("Local bending angles:", file=summary_output)
        residue_statistics = list(zip(*bending_statistics))
        measure_names = ["Mean ", "SD   ", "ABDEV"]
        print("ResID", end='', file=summary_output)
        if start is None:
            for item in range(4, len(residue_statistics[0]) + 4):
                output = "{0:8d}".format(item)
                print(output, end='', file=summary_output)
        else:
            for item in range(start + 3, len(residue_statistics[0]) + start + 3):
                output = "{0:8d}".format(item)
                print(output, end='', file=summary_output)
        print('', file=summary_output)

        for measure, name in zip(residue_statistics, measure_names):
            print(name, end='', file=summary_output)
            for residue in measure:
                output = "{0:8.1f}".format(residue)
                print(output, end='', file=summary_output)
            print('', file=summary_output)
class AnalysisBase(object):
    """Base class for defining multi frame analysis

    The class it is designed as a template for creating multiframe analyses.
    This class will automatically take care of setting up the trajectory
    reader for iterating, and it offers to show a progress meter.

    To define a new Analysis, `AnalysisBase` needs to be subclassed
    `_single_frame` must be defined. It is also possible to define
    `_prepare` and `_conclude` for pre and post processing. See the example
    below.

    .. code-block:: python

       class NewAnalysis(AnalysisBase):
           def __init__(self, atomgroup, parameter, **kwargs):
               super(NewAnalysis, self).__init__(atomgroup.universe.trajectory,
                                                 **kwargs)
               self._parameter = parameter
               self._ag = atomgroup

           def _prepare(self):
               # OPTIONAL
               # Called before iteration on the trajectory has begun.
               # Data structures can be set up at this time
               self.result = []

           def _single_frame(self):
               # REQUIRED
               # Called after the trajectory is moved onto each new frame.
               # store result of `some_function` for a single frame
               self.result.append(some_function(self._ag, self._parameter))

           def _conclude(self):
               # OPTIONAL
               # Called once iteration on the trajectory is finished.
               # Apply normalisation and averaging to results here.
               self.result = np.asarray(self.result) / np.sum(self.result)

    Afterwards the new analysis can be run like this.

    .. code-block:: python

       na = NewAnalysis(u.select_atoms('name CA'), 35).run(start=10, stop=20)
       print(na.result)

    """
    def __init__(self, trajectory, verbose=False, **kwargs):
        """
        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        verbose : bool, optional
           Turn on more logging and debugging, default ``False``
        """
        self._trajectory = trajectory
        self._verbose = verbose
        # do deprecated kwargs
        # remove in 1.0
        deps = []
        for arg in ['start', 'stop', 'step']:
            if arg in kwargs and not kwargs[arg] is None:
                deps.append(arg)
                setattr(self, arg, kwargs[arg])
        if deps:
            warnings.warn(
                'Setting the following kwargs should be '
                'done in the run() method: {}'.format(', '.join(deps)),
                DeprecationWarning)

    def _setup_frames(self, trajectory, start=None, stop=None, step=None):
        """
        Pass a Reader object and define the desired iteration pattern
        through the trajectory

        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        """
        self._trajectory = trajectory
        # TODO: Remove once start/stop/step are deprecated from init
        # See if these have been set as class attributes, and use that
        start = getattr(self, 'start', start)
        stop = getattr(self, 'stop', stop)
        step = getattr(self, 'step', step)
        start, stop, step = trajectory.check_slice_indices(start, stop, step)
        self.start = start
        self.stop = stop
        self.step = step
        self.n_frames = len(range(start, stop, step))
        interval = int(self.n_frames // 100)
        if interval == 0:
            interval = 1

        verbose = getattr(self, '_verbose', False)
        self._pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                 interval=interval,
                                 verbose=verbose)

    def _single_frame(self):
        """Calculate data from a single frame of trajectory

        Don't worry about normalising, just deal with a single frame.
        """
        raise NotImplementedError("Only implemented in child classes")

    def _prepare(self):
        """Set things up before the analysis loop begins"""
        pass

    def _conclude(self):
        """Finalise the results you've gathered.

        Called at the end of the run() method to finish everything up.
        """
        pass

    def run(self, start=None, stop=None, step=None, verbose=None):
        """Perform the calculation

        Parameters
        ----------
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        verbose : bool, optional
            Turn on verbosity
        """
        logger.info("Choosing frames to analyze")
        # if verbose unchanged, use class default
        verbose = getattr(self, '_verbose',
                          False) if verbose is None else verbose

        self._setup_frames(self._trajectory, start, stop, step)
        logger.info("Starting preparation")
        self._prepare()
        for i, ts in enumerate(
                self._trajectory[self.start:self.stop:self.step]):
            self._frame_index = i
            self._ts = ts
            # logger.info("--> Doing frame {} of {}".format(i+1, self.n_frames))
            self._single_frame()
            self._pm.echo(self._frame_index)
        logger.info("Finishing up")
        self._conclude()
        return self
Exemple #31
0
def density_from_Universe(universe, delta=1.0, atomselection='name OH2',
                          start=None, stop=None, step=None,
                          metadata=None, padding=2.0, cutoff=0, soluteselection=None,
                          use_kdtree=True, update_selection=False,
                          verbose=False, interval=1, quiet=None,
                          parameters=None,
                          gridcenter=None, xdim=None, ydim=None, zdim=None):
    """Create a density grid from a :class:`MDAnalysis.Universe` object.

    The trajectory is read, frame by frame, and the atoms selected with `atomselection` are
    histogrammed on a grid with spacing `delta`.

    Parameters
    ----------
    universe : MDAnalysis.Universe
            :class:`MDAnalysis.Universe` object with a trajectory
    atomselection : str (optional)
            selection string (MDAnalysis syntax) for the species to be analyzed
            ["name OH2"]
    delta : float (optional)
            bin size for the density grid in Angstroem (same in x,y,z) [1.0]
    start : int (optional)
    stop : int (optional)
    step : int (optional)
            Slice the trajectory as ``trajectory[start:stop:step]``; default
            is to read the whole trajectory.
    metadata : dict. optional
            `dict` of additional data to be saved with the object; the meta data
            are passed through as they are.
    padding : float (optional)
            increase histogram dimensions by padding (on top of initial box size)
            in Angstroem. Padding is ignored when setting a user defined grid. [2.0]
    soluteselection : str (optional)
            MDAnalysis selection for the solute, e.g. "protein" [``None``]
    cutoff : float (optional)
            With `cutoff`, select "<atomsel> NOT WITHIN <cutoff> OF <soluteselection>"
            (Special routines that are faster than the standard ``AROUND`` selection);
            any value that evaluates to ``False`` (such as the default 0) disables this
            special selection.
    update_selection : bool (optional)
            Should the selection of atoms be updated for every step? [``False``]

            - ``True``: atom selection is updated for each frame, can be slow
            - ``False``: atoms are only selected at the beginning
    verbose : bool (optional)
            Print status update to the screen for every *interval* frame? [``True``]

            - ``False``: no status updates when a new frame is processed
            - ``True``: status update every frame (including number of atoms
              processed, which is interesting with ``update_selection=True``)
    interval : int (optional)
           Show status update every `interval` frame [1]
    parameters : dict (optional)
            `dict` with some special parameters for :class:`Density` (see docs)
    gridcenter : numpy ndarray, float32 (optional)
            3 element numpy array detailing the x, y and z coordinates of the
            center of a user defined grid box in Angstroem [``None``]
    xdim : float (optional)
            User defined x dimension box edge in ångström; ignored if
            gridcenter is ``None``
    ydim : float (optional)
            User defined y dimension box edge in ångström; ignored if
            gridcenter is ``None``
    zdim : float (optional)
            User defined z dimension box edge in ångström; ignored if
            gridcenter is ``None``

    Returns
    -------
    :class:`Density`
            A :class:`Density` instance with the histogrammed data together
            with associated metadata.


    Notes
    -----

    By default, the `atomselection` is static, i.e., atoms are only selected
    once at the beginning. If you want *dynamically changing selections* (such
    as "name OW and around 4.0 (protein and not name H*)", i.e., the water
    oxygen atoms that are within 4 Å of the protein heavy atoms) then set
    ``update_selection=True``. For the special case of calculating a density of
    the "bulk" solvent away from a solute use the optimized selections with
    keywords *cutoff* and *soluteselection* (see Examples below).

    Examples
    --------
    Basic use for creating a water density (just using the water oxygen atoms "OW")::

      density = density_from_Universe(universe, delta=1.0, atomselection='name OW')

    If you are only interested in water within a certain region, e.g., within a
    vicinity around a binding site, you can use a selection that updates every
    step by setting the `update_selection` keyword argument::

      site_density = density_from_Universe(universe, delta=1.0,
                                           atomselection='name OW and around 5 (resid 156 157 305)',
                                           update_selection=True)

    A special case for an updating selection is to create the "bulk density",
    i.e., the water outside the immediate solvation shell of a protein: Select
    all water oxygen atoms that are *farther away* than a given cut-off (say, 4
    Å) from the solute (here, heavy atoms of the protein)::

      bulk = density_from_Universe(universe, delta=1.0, atomselection='name OW',
                                   solute="protein and not name H*",
                                   cutoff=4)

    (Using the special case for the bulk with `soluteselection` and `cutoff`
    improves performance over the simple `update_selection` approach.)

    If you are interested in explicitly setting a grid box of a given edge size
    and origin, you can use the gridcenter and x/y/zdim arguments. For example
    to plot the density of waters within 5 Å of a ligand (in this case the
    ligand has been assigned the residue name "LIG") in a cubic grid with 20 Å
    edges which is centered on the centre of mass (COM) of the ligand::

      # Create a selection based on the ligand
      ligand_selection = universe.select_atoms("resname LIG")

      # Extract the COM of the ligand
      ligand_COM = ligand_selection.center_of_mass()

      # Generate a density of waters on a cubic grid centered on the ligand COM
      # In this case, we update the atom selection as shown above.
      water_density = density_from_Universe(universe, delta=1.0,
                                            atomselection='name OW around 5 resname LIG',
                                            update_selection=True,
                                            gridcenter=ligand_COM,
                                            xdim=20.0, ydim=20.0, zdim=20.0)

      (It should be noted that the `padding` keyword is not used when a user
      defined grid is assigned).

    .. versionchanged:: 0.19.0
       *gridcenter*, *xdim*, *ydim* and *zdim* keywords added to allow for user
       defined boxes
    .. versionchanged:: 0.13.0
       *update_selection* and *quiet* keywords added

    .. deprecated:: 0.16
       The keyword argument *quiet* is deprecated in favor of *verbose*.

    """
    u = universe

    if cutoff > 0 and soluteselection is not None:
        # special fast selection for '<atomsel> not within <cutoff> of <solutesel>'
        notwithin_coordinates = notwithin_coordinates_factory(
            u, atomselection, soluteselection, cutoff,
            use_kdtree=use_kdtree, updating_selection=update_selection)
        def current_coordinates():
            return notwithin_coordinates()
    else:
        group = u.select_atoms(atomselection, updating=update_selection)

        def current_coordinates():
            return group.positions

    coord = current_coordinates()
    logger.info(
        "Selected {0:d} atoms out of {1:d} atoms ({2!s}) from {3:d} total."
        "".format(coord.shape[0], len(u.select_atoms(atomselection)),
                  atomselection, len(u.atoms))
    )

    # mild warning; typically this is run on RMS-fitted trajectories and
    # so the box information is rather meaningless
    box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:]
    if tuple(angles) != (90., 90., 90.):
        msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!"
        warnings.warn(msg)
        logger.warning(msg)

    if gridcenter is not None:
        # Generate a copy of smin/smax from coords to later check if the
        # defined box might be too small for the selection
        smin = np.min(coord, axis=0)
        smax = np.max(coord, axis=0)
        # Overwrite smin/smax with user defined values
        smin, smax = _set_user_grid(gridcenter, xdim, ydim, zdim, smin, smax)
    else:
        # Make the box bigger to avoid as much as possible 'outlier'. This
        # is important if the sites are defined at a high density: in this
        # case the bulk regions don't have to be close to 1 * n0 but can
        # be less. It's much more difficult to deal with outliers.  The
        # ideal solution would use images: implement 'looking across the
        # periodic boundaries' but that gets complicate when the box
        # rotates due to RMS fitting.
        smin = np.min(coord, axis=0) - padding
        smax = np.max(coord, axis=0) + padding

    BINS = fixedwidth_bins(delta, smin, smax)
    arange = np.vstack((BINS['min'], BINS['max']))
    arange = np.transpose(arange)
    bins = BINS['Nbins']

    # create empty grid with the right dimensions (and get the edges)
    grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, normed=False)
    grid *= 0.0
    h = grid.copy()

    pm = ProgressMeter(u.trajectory.n_frames, interval=interval,
                       verbose=verbose,
                       format="Histogramming %(n_atoms)6d atoms in frame "
                       "%(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")
    start, stop, step = u.trajectory.check_slice_indices(start, stop, step)
    for ts in u.trajectory[start:stop:step]:
        coord = current_coordinates()

        pm.echo(ts.frame, n_atoms=len(coord))
        if len(coord) == 0:
            continue

        h[:], edges[:] = np.histogramdd(coord, bins=bins, range=arange, normed=False)
        grid += h  # accumulate average histogram

    n_frames = len(range(start, stop, step))
    grid /= float(n_frames)

    metadata = metadata if metadata is not None else {}
    metadata['psf'] = u.filename
    metadata['dcd'] = u.trajectory.filename
    metadata['atomselection'] = atomselection
    metadata['n_frames'] = n_frames
    metadata['totaltime'] = round(u.trajectory.n_frames * u.trajectory.dt, 3)
    metadata['dt'] = u.trajectory.dt
    metadata['time_unit'] = MDAnalysis.core.flags['time_unit']
    try:
        metadata['trajectory_skip'] = u.trajectory.skip_timestep  # frames
    except AttributeError:
        metadata['trajectory_skip'] = 1  # seems to not be used..
    try:
        metadata['trajectory_delta'] = u.trajectory.delta  # in native units
    except AttributeError:
        metadata['trajectory_delta'] = 1
    if cutoff > 0 and soluteselection is not None:
        metadata['soluteselection'] = soluteselection
        metadata['cutoff'] = cutoff  # in Angstrom

    parameters = parameters if parameters is not None else {}
    parameters['isDensity'] = False  # must override


    g = Density(grid=grid, edges=edges, units={'length': MDAnalysis.core.flags['length_unit']},
                parameters=parameters, metadata=metadata)
    g.make_density()
    logger.info("Density completed (initial density in Angstrom**-3)")

    return g
Exemple #32
0
    def run(self, **kwargs):
        """Perform RMSD analysis on the trajectory.

        A number of parameters can be changed from the defaults. The
        result is stored as the array :attr:`RMSD.rmsd`.

        :Keywords:
          *start*, *stop*, *step*
             start and stop frame index with step size: analyse
             ``trajectory[start:stop:step]`` [``None``]
          *mass_weighted*
             do a mass-weighted RMSD fit
          *tol_mass*
             Reject match if the atomic masses for matched atoms differ by more than
             *tol_mass*
          *ref_frame*
             frame index to select frame from *reference*

        """
        from itertools import izip

        start = kwargs.pop('start', None)
        stop = kwargs.pop('stop', None)
        step = kwargs.pop('step', None)
        mass_weighted = kwargs.pop('mass_weighted', self.mass_weighted)
        ref_frame = kwargs.pop('ref_frame', self.ref_frame)

        natoms = self.traj_atoms.n_atoms
        trajectory = self.universe.trajectory
        traj_atoms = self.traj_atoms

        if mass_weighted:
            # if performing a mass-weighted alignment/rmsd calculation
            weight = self.ref_atoms.masses / self.ref_atoms.masses.mean()
        else:
            weight = None

        # reference centre of mass system
        current_frame = self.reference.trajectory.ts.frame - 1
        try:
            # Move to the ref_frame
            # (coordinates MUST be stored in case the ref traj is advanced elsewhere or if ref == mobile universe)
            self.reference.trajectory[ref_frame]
            ref_com = self.ref_atoms.center_of_mass()
            ref_coordinates = self.ref_atoms.positions - ref_com  # makes a copy
            if self.groupselections_atoms:
                groupselections_ref_coords_T_64 = [
                    self.reference.select_atoms(*s['reference']).positions.T.astype(np.float64) for s in
                    self.groupselections]
        finally:
            # Move back to the original frame
            self.reference.trajectory[current_frame]
        ref_coordinates_T_64 = ref_coordinates.T.astype(np.float64)

        # allocate the array for selection atom coords
        traj_coordinates = traj_atoms.coordinates().copy()

        if self.groupselections_atoms:
            # Only carry out a rotation if we want to calculate secondary RMSDs.
            # R: rotation matrix that aligns r-r_com, x~-x~com
            #    (x~: selected coordinates, x: all coordinates)
            # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com
            rot = np.zeros(9, dtype=np.float64)  # allocate space for calculation
            R = np.matrix(rot.reshape(3, 3))
        else:
            rot = None

        # RMSD timeseries
        nframes = len(np.arange(0, len(trajectory))[start:stop:step])
        rmsd = np.zeros((nframes, 3 + len(self.groupselections_atoms)))

        percentage = ProgressMeter(nframes, interval=10,
                                   format="RMSD %(rmsd)5.2f A at frame %(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")

        for k, ts in enumerate(trajectory[start:stop:step]):
            # shift coordinates for rotation fitting
            # selection is updated with the time frame
            x_com = traj_atoms.center_of_mass().astype(np.float32)
            traj_coordinates[:] = traj_atoms.coordinates() - x_com

            rmsd[k, :2] = ts.frame, trajectory.time

            if self.groupselections_atoms:
                # 1) superposition structures
                # Need to transpose coordinates such that the coordinate array is
                # 3xN instead of Nx3. Also qcp requires that the dtype be float64
                # (I think we swapped the position of ref and traj in CalcRMSDRotationalMatrix
                # so that R acts **to the left** and can be broadcasted; we're saving
                # one transpose. [orbeckst])
                rmsd[k, 2] = qcp.CalcRMSDRotationalMatrix(ref_coordinates_T_64,
                                                          traj_coordinates.T.astype(np.float64),
                                                          natoms, rot, weight)
                R[:, :] = rot.reshape(3, 3)

                # Transform each atom in the trajectory (use inplace ops to avoid copying arrays)
                # (Marginally (~3%) faster than "ts.positions[:] = (ts.positions - x_com) * R + ref_com".)
                ts.positions -= x_com
                ts.positions[:] = ts.positions * R  # R acts to the left & is broadcasted N times.
                ts.positions += ref_com

                # 2) calculate secondary RMSDs
                for igroup, (refpos, atoms) in enumerate(
                        izip(groupselections_ref_coords_T_64, self.groupselections_atoms), 3):
                    rmsd[k, igroup] = qcp.CalcRMSDRotationalMatrix(refpos,
                                                                   atoms['mobile'].positions.T.astype(np.float64),
                                                                   atoms['mobile'].n_atoms, None, weight)
            else:
                # only calculate RMSD by setting the Rmatrix to None
                # (no need to carry out the rotation as we already get the optimum RMSD)
                rmsd[k, 2] = qcp.CalcRMSDRotationalMatrix(ref_coordinates_T_64,
                                                          traj_coordinates.T.astype(np.float64),
                                                          natoms, None, weight)

            percentage.echo(ts.frame, rmsd=rmsd[k, 2])
        self.rmsd = rmsd
Exemple #33
0
def helanal_trajectory(universe,
                       select="name CA",
                       begin=None,
                       finish=None,
                       matrix_filename="bending_matrix.dat",
                       origin_pdbfile="origin.pdb",
                       summary_filename="summary.txt",
                       screw_filename="screw.xvg",
                       tilt_filename="local_tilt.xvg",
                       fitted_tilt_filename="fit_tilt.xvg",
                       bend_filename="local_bend.xvg",
                       twist_filename="unit_twist.xvg",
                       prefix="helanal_",
                       ref_axis=None,
                       verbose=False):
    """Perform HELANAL helix analysis on all frames in `universe`.

    Parameters
    ----------
    universe : Universe
    select : str (optional)
        selection string that selects Calpha atoms [``"name CA"``]
    begin : float (optional)
        start analysing for time (ps) >= *begin*; ``None`` starts from the
        beginning [``None``]
    finish : float (optional)
        stop analysis for time (ps) =< *finish*; ``None`` goes to the
        end of the trajectory [``None``]
    matrix_filename : str (optional)
        Output file- bending matrix [``"bending_matrix.dat"``]
    origin_pdbfile : str (optional)
        Output file- origin pdb file [``"origin.pdb"``]
    summary_filename : str (optional)
        Output file- all of the basic data [``"summary.txt"``]
    screw_filename : str (optional)
        Output file- local tilts of individual residues from 2 to n-1
        [``"screw.xvg"``]
    tilt_filename : str (optional)
        Output file- tilt of line of best fit applied to origin axes
        [``"local_tilt.xvg"``]
    bend_filename : str (optional)
        Output file- local bend angles between successive local helix axes
        [``"local_bend.xvg"``]
    twist_filename : str (optional)
        Output file- local unit twist between successive helix turns
        [``"unit_twist.xvg"``]
    prefix : str (optional)
        Prefix to add to all output file names; set to ``None`` to disable
        [``"helanal__"``]
    ref_axis : array_like (optional)
        Calculate tilt angle relative to the axis; if ``None`` then ``[0,0,1]``
        is chosen [``None``]
    verbose : bool (optional)
        Toggle diagnostic outputs. [``True``]

    Raises
    ------
    ValueError
          If the specified start (begin) time occurs after the end of the
          trajectory object.
          If the specified finish time precedes the specified start time or
          current time stamp of trajectory object.

    Notes
    -----
    Only a single helix is analyzed. Use the selection to specify the helix,
    e.g. with "name CA and resid 1:20" or use start=1, stop=20.


    .. versionchanged:: 0.13.0
       New `quiet` keyword to silence frame progress output and most of the
       output that used to be printed to stdout is now logged to the logger
       *MDAnalysis.analysis.helanal* (at logelevel *INFO*).

    .. versionchanged:: 0.16.0
       Removed the `start` and `end` keywords for selecting residues because this can
       be accomplished more transparently with `select`. The first and last resid
       are directly obtained from the selection.

    .. deprecated:: 0.16.0
       The `quiet` keyword argument is deprecated in favor of the new
       `verbose` one.

    .. versionchanged:: 0.20.0
       ProgressMeter now iterates over the number of frames analysed.

    .. versionchanged:: 1.0.0
       Changed `selection` keyword to `select`
    """
    if ref_axis is None:
        ref_axis = np.array([0., 0., 1.])
    else:
        # enable MDA API so that one can use a tuple of atoms or AtomGroup with
        # two atoms
        ref_axis = np.asarray(ref_axis)

    ca = universe.select_atoms(select)
    start, end = ca.resids[[0, -1]]
    trajectory = universe.trajectory

    # Validate user supplied begin / end times
    traj_end_time = trajectory.ts.time + trajectory.totaltime

    if begin is not None:
        if traj_end_time < begin:
            # Begin occurs after the end of the trajectory, throw error
            msg = ("The input begin time ({0} ps) occurs after the end "
                   "of the trajectory ({1} ps)".format(begin, traj_end_time))
            raise ValueError(msg)
        elif trajectory.ts.time > begin:
            # Begin occurs before trajectory start, warn and reset
            msg = ("The input begin time ({0} ps) precedes the starting "
                   "trajectory time --- Setting starting frame to 0".format(
                       begin))
            warnings.warn(msg)
            logger.warning(msg)
            start_frame = None
        else:
            start_frame = int(
                np.ceil((begin - trajectory.ts.time) / trajectory.ts.dt))
    else:
        start_frame = None

    if finish is not None:
        if (begin is not None) and (begin > finish):
            # finish occurs before begin time
            msg = ("The input finish time ({0} ps) precedes the input begin "
                   "time ({1} ps)".format(finish, begin))
            raise ValueError(msg)
        elif trajectory.ts.time > finish:
            # you'd be starting with a finish time(in ps) that has already
            # passed or is not available
            msg = ("The input finish time ({0} ps) precedes the current "
                   "trajectory time ({1} ps)".format(finish, trajectory.time))
            raise ValueError(msg)
        elif traj_end_time < finish:
            # finish time occurs after the end of trajectory, warn
            msg = ("The input finish time ({0} ps) occurs after the end of "
                   "the trajectory ({1} ps). Finish time will be set to the "
                   "end of the trajectory".format(finish, traj_end_time))
            warnings.warn(msg)
            logger.warning(msg)
            end_frame = None
        else:
            # To replicate the original behaviour of break when
            # trajectory.time > finish, we add 1 here.
            end_frame = int(
                np.floor((finish - trajectory.ts.time) // trajectory.ts.dt) +
                1)
    else:
        end_frame = None

    start_frame, end_frame, frame_step = trajectory.check_slice_indices(
        start_frame, end_frame, 1)
    n_frames = len(range(start_frame, end_frame, frame_step))

    if start is not None and end is not None:
        logger.info("Analysing from residue %d to %d", start, end)
    elif start is not None and end is None:
        logger.info("Analysing from residue %d to the C termini", start)
    elif start is None and end is not None:
        logger.info("Analysing from the N termini to %d", end)
    logger.info("Analysing %d/%d residues", ca.n_atoms,
                universe.atoms.n_residues)

    if prefix is not None:
        prefix = str(prefix)
        matrix_filename = prefix + matrix_filename
        origin_pdbfile = prefix + origin_pdbfile
        summary_filename = prefix + summary_filename
        screw_filename = prefix + screw_filename
        tilt_filename = prefix + tilt_filename
        fitted_tilt_filename = prefix + fitted_tilt_filename
        bend_filename = prefix + bend_filename
        twist_filename = prefix + twist_filename
    backup_file(matrix_filename)
    backup_file(origin_pdbfile)
    backup_file(summary_filename)
    backup_file(screw_filename)
    backup_file(tilt_filename)
    backup_file(fitted_tilt_filename)
    backup_file(bend_filename)
    backup_file(twist_filename)

    global_height = []
    global_twist = []
    global_rnou = []
    global_bending = []
    global_bending_matrix = []
    global_tilt = []
    global_fitted_tilts = []
    global_screw = []

    pm = ProgressMeter(n_frames,
                       verbose=verbose,
                       format="Frame {step:5d}/{numsteps} "
                       "  [{percentage:5.1f}%]")

    for index, ts in enumerate(trajectory[start_frame:end_frame:frame_step]):
        pm.echo(index)
        frame = ts.frame

        ca_positions = ca.positions
        twist, bending_angles, height, rnou, origins, local_helix_axes, local_screw_angles = \
            main_loop(ca_positions, ref_axis=ref_axis)

        origin_pdb(origins, origin_pdbfile)

        #calculate local bending matrix( it is looking at all i, j combinations)
        if len(global_bending_matrix) == 0:
            global_bending_matrix = [[[] for item in local_helix_axes]
                                     for item in local_helix_axes]

        for i in range(len(local_helix_axes)):
            for j in range(i + 1, len(local_helix_axes)):
                angle = np.rad2deg(
                    np.arccos(np.dot(local_helix_axes[i],
                                     local_helix_axes[j])))
                global_bending_matrix[i][j].append(angle)
                #global_bending_matrix[j][i].append(angle)
                #global_bending_matrix[i][i].append(0.)

        fit_vector, fit_tilt = vector_of_best_fit(origins)
        global_height += height
        global_twist += twist
        global_rnou += rnou
        #global_screw.append(local_screw_angles)
        global_fitted_tilts.append(np.rad2deg(fit_tilt))

        #print out rotations across the helix to a file
        with open(twist_filename, "a") as twist_output:
            print(frame, end='', file=twist_output)
            for loc_twist in twist:
                print(loc_twist, end='', file=twist_output)
            print("", file=twist_output)

        with open(bend_filename, "a") as bend_output:
            print(frame, end='', file=bend_output)
            for loc_bend in bending_angles:
                print(loc_bend, end='', file=bend_output)
            print("", file=bend_output)

        with open(screw_filename, "a") as rot_output:
            print(frame, end='', file=rot_output)
            for rotation in local_screw_angles:
                print(rotation, end='', file=rot_output)
            print("", file=rot_output)

        with open(tilt_filename, "a") as tilt_output:
            print(frame, end='', file=tilt_output)
            for tilt in local_helix_axes:
                print(np.rad2deg(mdamath.angle(tilt, ref_axis)),
                      end='',
                      file=tilt_output)
            print("", file=tilt_output)

        with open(fitted_tilt_filename, "a") as tilt_output:
            print(frame, np.rad2deg(fit_tilt), file=tilt_output)

        if len(global_bending) == 0:
            global_bending = [[] for item in bending_angles]
            #global_tilt = [ [] for item in local_helix_axes ]
        for store, tmp in zip(global_bending, bending_angles):
            store.append(tmp)
        #for store,tmp in zip(global_tilt,local_helix_axes): store.append(mdamath.angle(tmp,ref_axis))

    twist_mean, twist_sd, twist_abdev = stats(global_twist)
    height_mean, height_sd, height_abdev = stats(global_height)
    rnou_mean, rnou_sd, rnou_abdev = stats(global_rnou)
    ftilt_mean, ftilt_sd, ftilt_abdev = stats(global_fitted_tilts)

    bending_statistics = [stats(item) for item in global_bending]
    #tilt_statistics =    [ stats(item) for item in global_tilt]

    bending_statistics_matrix = [[stats(col) for col in row]
                                 for row in global_bending_matrix]
    with open(matrix_filename, 'w') as mat_output:
        print("Mean", file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[0])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

        print('\nSD', file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[1])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

        print("\nABDEV", file=mat_output)
        for row in bending_statistics_matrix:
            for col in row:
                formatted_angle = "{0:6.1f}".format(col[2])
                print(formatted_angle, end='', file=mat_output)
            print('', file=mat_output)

    logger.info("Height: %g  SD: %g  ABDEV: %g  (Angstroem)", height_mean,
                height_sd, height_abdev)
    logger.info("Twist: %g  SD: %g  ABDEV: %g", twist_mean, twist_sd,
                twist_abdev)
    logger.info("Residues/turn: %g  SD: %g  ABDEV: %g", rnou_mean, rnou_sd,
                rnou_abdev)
    logger.info("Fitted tilt: %g  SD: %g  ABDEV: %g", ftilt_mean, ftilt_sd,
                ftilt_abdev)
    logger.info("Local bending angles:")
    residue_statistics = list(zip(*bending_statistics))
    measure_names = ["Mean ", "SD   ", "ABDEV"]
    if start is None:
        output = " ".join([
            "{0:8d}".format(item)
            for item in range(4,
                              len(residue_statistics[0]) + 4)
        ])
    else:
        output = " ".join([
            "{0:8d}".format(item)
            for item in range(start + 3,
                              len(residue_statistics[0]) + start + 3)
        ])
    logger.info("ResID %s", output)
    for measure, name in zip(residue_statistics, measure_names):
        output = str(name) + " "
        output += " ".join(["{0:8.1f}".format(residue) for residue in measure])
        logger.info(output)

    with open(summary_filename, 'w') as summary_output:
        print("Height:",
              height_mean,
              "SD",
              height_sd,
              "ABDEV",
              height_abdev,
              '(nm)',
              file=summary_output)
        print("Twist:",
              twist_mean,
              "SD",
              twist_sd,
              "ABDEV",
              twist_abdev,
              file=summary_output)
        print("Residues/turn:",
              rnou_mean,
              "SD",
              rnou_sd,
              "ABDEV",
              rnou_abdev,
              file=summary_output)
        print("Local bending angles:", file=summary_output)
        residue_statistics = list(zip(*bending_statistics))
        measure_names = ["Mean ", "SD   ", "ABDEV"]
        print("ResID", end='', file=summary_output)
        if start is None:
            for item in range(4, len(residue_statistics[0]) + 4):
                output = "{0:8d}".format(item)
                print(output, end='', file=summary_output)
        else:
            for item in range(start + 3,
                              len(residue_statistics[0]) + start + 3):
                output = "{0:8d}".format(item)
                print(output, end='', file=summary_output)
        print('', file=summary_output)

        for measure, name in zip(residue_statistics, measure_names):
            print(name, end='', file=summary_output)
            for residue in measure:
                output = "{0:8.1f}".format(residue)
                print(output, end='', file=summary_output)
            print('', file=summary_output)
Exemple #34
0
class AnalysisBase(object):
    """Base class for defining multi frame analysis

    The class it is designed as a template for creating multiframe analyses.
    This class will automatically take care of setting up the trajectory
    reader for iterating, and it offers to show a progress meter.

    To define a new Analysis, `AnalysisBase` needs to be subclassed
    `_single_frame` must be defined. It is also possible to define
    `_prepare` and `_conclude` for pre and post processing. See the example
    below.

    .. code-block:: python

       class NewAnalysis(AnalysisBase):
           def __init__(self, atomgroup, parameter, **kwargs):
               super(NewAnalysis, self).__init__(atomgroup.universe.trajectory,
                                                 **kwargs)
               self._parameter = parameter
               self._ag = atomgroup

           def _prepare(self):
               # OPTIONAL
               # Called before iteration on the trajectory has begun.
               # Data structures can be set up at this time
               self.result = []

           def _single_frame(self):
               # REQUIRED
               # Called after the trajectory is moved onto each new frame.
               # store result of `some_function` for a single frame
               self.result.append(some_function(self._ag, self._parameter))

           def _conclude(self):
               # OPTIONAL
               # Called once iteration on the trajectory is finished.
               # Apply normalisation and averaging to results here.
               self.result = np.asarray(self.result) / np.sum(self.result)

    Afterwards the new analysis can be run like this.

    .. code-block:: python

       na = NewAnalysis(u.select_atoms('name CA'), 35).run()
       print(na.result)

    """

    def __init__(self, trajectory, start=None,
                 stop=None, step=None, verbose=None, quiet=None):
        """
        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        verbose : bool, optional
            Turn on verbosity
        """
        self._verbose = _set_verbose(verbose, quiet, default=False)
        self._quiet = not self._verbose
        self._setup_frames(trajectory, start, stop, step)

    def _setup_frames(self, trajectory, start=None, stop=None, step=None):
        """
        Pass a Reader object and define the desired iteration pattern
        through the trajectory

        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        """
        self._trajectory = trajectory
        self.start = start
        self.stop = stop
        self.step = step
        start, stop, step = trajectory.check_slice_indices(start, stop, step)
        self.n_frames = len(range(start, stop, step))
        interval = int(self.n_frames // 100)
        if interval == 0:
            interval = 1

        # ensure _verbose is set when __init__ wasn't called, this is to not
        # break pre 0.16.0 API usage of AnalysisBase
        if not hasattr(self, '_verbose'):
            if hasattr(self, '_quiet'):
                # Here, we are in the odd case where a children class defined
                # self._quiet without going through AnalysisBase.__init__.
                warnings.warn("The *_quiet* attribute of analyses is "
                              "deprecated (from 0.16)use *_verbose* instead.",
                              DeprecationWarning)
                self._verbose = not self._quiet
            else:
                self._verbose = True
                self._quiet = not self._verbose
        self._pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                 interval=interval, verbose=self._verbose)

    def _single_frame(self):
        """Calculate data from a single frame of trajectory

        Don't worry about normalising, just deal with a single frame.
        """
        raise NotImplementedError("Only implemented in child classes")

    def _prepare(self):
        """Set things up before the analysis loop begins"""
        pass

    def _conclude(self):
        """Finalise the results you've gathered.

        Called at the end of the run() method to finish everything up.
        """
        pass

    def run(self):
        """Perform the calculation"""
        logger.info("Starting preparation")
        self._prepare()
        for i, ts in enumerate(
                self._trajectory[self.start:self.stop:self.step]):
            self._frame_index = i
            self._ts = ts
            # logger.info("--> Doing frame {} of {}".format(i+1, self.n_frames))
            self._single_frame()
            self._pm.echo(self._frame_index)
        logger.info("Finishing up")
        self._conclude()
        return self
Exemple #35
0
def rms_fit_trj(traj,
                reference,
                select='all',
                filename=None,
                rmsdfile=None,
                prefix='rmsfit_',
                mass_weighted=False,
                tol_mass=0.1,
                strict=False,
                force=True,
                quiet=False,
                **kwargs):
    """RMS-fit trajectory to a reference structure using a selection.

    Both reference *ref* and trajectory *traj* must be
    :class:`MDAnalysis.Universe` instances. If they contain a
    trajectory then it is used. The output file format is determined
    by the file extension of *filename*. One can also use the same
    universe if one wants to fit to the current frame.

    :Arguments:
      *traj*
         trajectory, :class:`MDAnalysis.Universe` object
      *reference*
         reference coordinates; :class:`MDAnalysis.Universe` object
         (uses the current time step of the object)
      *select*
         1. any valid selection string for
            :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` that produces identical
            selections in *mobile* and *reference*; or
         2. a dictionary ``{'mobile':sel1, 'reference':sel2}`` (the
            :func:`fasta2select` function returns such a
            dictionary based on a ClustalW_ or STAMP_ sequence alignment); or
         3. a tuple ``(sel1, sel2)``

         When using 2. or 3. with *sel1* and *sel2* then these selections can also each be
         a list of selection strings (to generate a AtomGroup with defined atom order as
         described under :ref:`ordered-selections-label`).
      *filename*
         file name for the RMS-fitted trajectory or pdb; defaults to the
         original trajectory filename (from *traj*) with *prefix* prepended
      *rmsdfile*
         file name for writing the RMSD timeseries [``None``]
      *prefix*
         prefix for autogenerating the new output filename
      *mass_weighted*
         do a mass-weighted RMSD fit
      *tol_mass*
         Reject match if the atomic masses for matched atoms differ by more than
         *tol_mass* [0.1]
      *strict*
         Default: ``False``
         - ``True``: Will raise :exc:`SelectioError` if a single atom does not
           match between the two selections.
         - ``False``: Will try to prepare a matching selection by dropping
           residues with non-matching atoms. See :func:`get_matching_atoms`
           for details.
      *force*
         - ``True``: Overwrite an existing output trajectory (default)
         - ``False``: simply return if the file already exists
      *quiet*
         - ``True``: suppress progress and logging for levels INFO and below.
         - ``False``: show all status messages and do not change the the logging
           level (default)

         .. Note:: If


      *kwargs*
         All other keyword arguments are passed on the trajectory
         :class:`~MDAnalysis.coordinates.base.Writer`; this allows manipulating/fixing
         trajectories on the fly (e.g. change the output format by changing the extension of *filename*
         and setting different parameters as described for the corresponding writer).

    :Returns: *filename* (either provided or auto-generated)

    .. _ClustalW: http://www.clustal.org/
    .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/

    .. versionchanged:: 0.8
       Added *kwargs* to be passed to the trajectory :class:`~MDAnalysis.coordinates.base.Writer` and
       *filename* is returned.

    .. versionchanged:: 0.10.0
       Uses :func:`get_matching_atoms` to work with incomplete selections
       and new *strict* keyword. The new default is to be lenient whereas
       the old behavior was the equivalent of *strict* = ``True``.

    """
    frames = traj.trajectory
    if quiet:
        # should be part of a try ... finally to guarantee restoring the log level
        logging.disable(logging.WARN)

    kwargs.setdefault('remarks', 'RMS fitted trajectory to reference')
    if filename is None:
        path, fn = os.path.split(frames.filename)
        filename = os.path.join(path, prefix + fn)
        _Writer = frames.Writer
    else:
        _Writer = frames.OtherWriter
    if os.path.exists(filename) and not force:
        logger.warn(
            "{0} already exists and will NOT be overwritten; use force=True if you want this"
            .format(filename))
        return filename
    writer = _Writer(filename, **kwargs)
    del _Writer

    select = rms._process_selection(select)
    ref_atoms = reference.select_atoms(*select['reference'])
    traj_atoms = traj.select_atoms(*select['mobile'])
    natoms = traj_atoms.n_atoms

    ref_atoms, traj_atoms = get_matching_atoms(ref_atoms,
                                               traj_atoms,
                                               tol_mass=tol_mass,
                                               strict=strict)

    logger.info("RMS-fitting on {0:d} atoms.".format(len(ref_atoms)))
    if mass_weighted:
        # if performing a mass-weighted alignment/rmsd calculation
        weight = ref_atoms.masses / ref_atoms.masses.mean()
    else:
        weight = None

    # reference centre of mass system
    ref_com = ref_atoms.center_of_mass()
    ref_coordinates = ref_atoms.coordinates() - ref_com

    # allocate the array for selection atom coords
    traj_coordinates = traj_atoms.coordinates().copy()

    # RMSD timeseries
    nframes = len(frames)
    rmsd = np.zeros((nframes, ))

    # R: rotation matrix that aligns r-r_com, x~-x~com
    #    (x~: selected coordinates, x: all coordinates)
    # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com
    rot = np.zeros(9, dtype=np.float64)  # allocate space for calculation
    R = np.matrix(rot.reshape(3, 3))

    percentage = ProgressMeter(
        nframes,
        interval=10,
        quiet=quiet,
        format="Fitted frame %(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")

    for k, ts in enumerate(frames):
        # shift coordinates for rotation fitting
        # selection is updated with the time frame
        x_com = traj_atoms.center_of_mass().astype(np.float32)
        traj_coordinates[:] = traj_atoms.coordinates() - x_com

        # Need to transpose coordinates such that the coordinate array is
        # 3xN instead of Nx3. Also qcp requires that the dtype be float64
        # (I think we swapped the position of ref and traj in CalcRMSDRotationalMatrix
        # so that R acts **to the left** and can be broadcasted; we're saving
        # one transpose. [orbeckst])
        rmsd[k] = qcp.CalcRMSDRotationalMatrix(
            ref_coordinates.T.astype(np.float64),
            traj_coordinates.T.astype(np.float64), natoms, rot, weight)
        R[:, :] = rot.reshape(3, 3)

        # Transform each atom in the trajectory (use inplace ops to avoid copying arrays)
        # (Marginally (~3%) faster than "ts.positions[:] = (ts.positions - x_com) * R + ref_com".)
        ts.positions -= x_com
        ts.positions[:] = ts.positions * R  # R acts to the left & is broadcasted N times.
        ts.positions += ref_com

        writer.write(traj.atoms)  # write whole input trajectory system
        percentage.echo(ts.frame)
    logger.info("Wrote %d RMS-fitted coordinate frames to file %r",
                frames.n_frames, filename)
    if not rmsdfile is None:
        np.savetxt(rmsdfile, rmsd)
        logger.info("Wrote RMSD timeseries  to file %r", rmsdfile)

    if quiet:
        # should be part of a try ... finally to guarantee restoring the log level
        logging.disable(logging.NOTSET)

    return filename
Exemple #36
0
class AnalysisBase(object):
    """Base class for defining multi frame analysis

    The class it is designed as a template for creating multiframe analyses.
    This class will automatically take care of setting up the trajectory
    reader for iterating, and it offers to show a progress meter.

    To define a new Analysis, `AnalysisBase` needs to be subclassed
    `_single_frame` must be defined. It is also possible to define
    `_prepare` and `_conclude` for pre and post processing. See the example
    below.

    .. code-block:: python

       class NewAnalysis(AnalysisBase):
           def __init__(self, atomgroup, parameter, **kwargs):
               super(NewAnalysis, self).__init__(atomgroup.universe.trajectory,
                                                 **kwargs)
               self._parameter = parameter
               self._ag = atomgroup

           def _prepare(self):
               # OPTIONAL
               # Called before iteration on the trajectory has begun.
               # Data structures can be set up at this time
               self.result = []

           def _single_frame(self):
               # REQUIRED
               # Called after the trajectory is moved onto each new frame.
               # store result of `some_function` for a single frame
               self.result.append(some_function(self._ag, self._parameter))

           def _conclude(self):
               # OPTIONAL
               # Called once iteration on the trajectory is finished.
               # Apply normalisation and averaging to results here.
               self.result = np.asarray(self.result) / np.sum(self.result)

    Afterwards the new analysis can be run like this.

    .. code-block:: python

       na = NewAnalysis(u.select_atoms('name CA'), 35).run()
       print(na.result)

    """
    def __init__(self,
                 trajectory,
                 start=None,
                 stop=None,
                 step=None,
                 verbose=None,
                 quiet=None):
        """
        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        verbose : bool, optional
            Turn on verbosity
        """
        self._verbose = _set_verbose(verbose, quiet, default=False)
        self._quiet = not self._verbose
        self._setup_frames(trajectory, start, stop, step)

    def _setup_frames(self, trajectory, start=None, stop=None, step=None):
        """
        Pass a Reader object and define the desired iteration pattern
        through the trajectory

        Parameters
        ----------
        trajectory : mda.Reader
            A trajectory Reader
        start : int, optional
            start frame of analysis
        stop : int, optional
            stop frame of analysis
        step : int, optional
            number of frames to skip between each analysed frame
        """
        self._trajectory = trajectory
        start, stop, step = trajectory.check_slice_indices(start, stop, step)
        self.start = start
        self.stop = stop
        self.step = step
        self.n_frames = len(range(start, stop, step))
        interval = int(self.n_frames // 100)
        if interval == 0:
            interval = 1

        # ensure _verbose is set when __init__ wasn't called, this is to not
        # break pre 0.16.0 API usage of AnalysisBase
        if not hasattr(self, '_verbose'):
            if hasattr(self, '_quiet'):
                # Here, we are in the odd case where a children class defined
                # self._quiet without going through AnalysisBase.__init__.
                warnings.warn(
                    "The *_quiet* attribute of analyses is "
                    "deprecated (from 0.16)use *_verbose* instead.",
                    DeprecationWarning)
                self._verbose = not self._quiet
            else:
                self._verbose = True
                self._quiet = not self._verbose
        self._pm = ProgressMeter(self.n_frames if self.n_frames else 1,
                                 interval=interval,
                                 verbose=self._verbose)

    def _single_frame(self):
        """Calculate data from a single frame of trajectory

        Don't worry about normalising, just deal with a single frame.
        """
        raise NotImplementedError("Only implemented in child classes")

    def _prepare(self):
        """Set things up before the analysis loop begins"""
        pass

    def _conclude(self):
        """Finalise the results you've gathered.

        Called at the end of the run() method to finish everything up.
        """
        pass

    def run(self):
        """Perform the calculation"""
        logger.info("Starting preparation")
        self._prepare()
        for i, ts in enumerate(
                self._trajectory[self.start:self.stop:self.step]):
            self._frame_index = i
            self._ts = ts
            # logger.info("--> Doing frame {} of {}".format(i+1, self.n_frames))
            self._single_frame()
            self._pm.echo(self._frame_index)
        logger.info("Finishing up")
        self._conclude()
        return self