Exemple #1
0
def rms_fit_trj(traj,
                reference,
                select='all',
                filename=None,
                rmsdfile=None,
                prefix='rmsfit_',
                mass_weighted=False,
                tol_mass=0.1,
                force=True,
                quiet=False,
                **kwargs):
    """RMS-fit trajectory to a reference structure using a selection.

    Both reference *ref* and trajectory *traj* must be
    :class:`MDAnalysis.Universe` instances. If they contain a
    trajectory then it is used. The output file format is determined
    by the file extension of *filename*. One can also use the same
    universe if one wants to fit to the current frame.

    :Arguments:
      *traj*
         trajectory, :class:`MDAnalysis.Universe` object
      *reference*
         reference coordinates; :class:`MDAnalysis.Universe` object
         (uses the current time step of the object)
      *select*
         1. any valid selection string for
            :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.selectAtoms` that produces identical
            selections in *mobile* and *reference*; or
         2. a dictionary ``{'mobile':sel1, 'reference':sel2}`` (the
            :func:`fasta2select` function returns such a
            dictionary based on a ClustalW_ or STAMP_ sequence alignment); or
         3. a tuple ``(sel1, sel2)``

         When using 2. or 3. with *sel1* and *sel2* then these selections can also each be
         a list of selection strings (to generate a AtomGroup with defined atom order as
         described under :ref:`ordered-selections-label`).
      *filename*
         file name for the RMS-fitted trajectory or pdb; defaults to the
         original trajectory filename (from *traj*) with *prefix* prepended
      *rmsdfile*
         file name for writing the RMSD timeseries [``None``]
      *prefix*
         prefix for autogenerating the new output filename
      *mass_weighted*
         do a mass-weighted RMSD fit
      *tol_mass*
         Reject match if the atomic masses for matched atoms differ by more than
         *tol_mass* [0.1]
      *force*
         - ``True``: Overwrite an existing output trajectory (default)
         - ``False``: simply return if the file already exists
      *quiet*
         - ``True``: suppress progress and logging for levels INFO and below.
         - ``False``: show all status messages and do not change the the logging
           level (default)

         .. Note:: If


      *kwargs*
         All other keyword arguments are passed on the trajectory
         :class:`~MDAnalysis.coordinates.base.Writer`; this allows manipulating/fixing
         trajectories on the fly (e.g. change the output format by changing the extension of *filename*
         and setting different parameters as described for the corresponding writer).

    :Returns: *filename* (either provided or auto-generated)

    .. _ClustalW: http://www.clustal.org/
    .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/

    .. versionchanged:: 0.8
       Added *kwargs* to be passed to the trajectory :class:`~MDAnalysis.coordinates.base.Writer` and
       *filename* is returned.
    """
    frames = traj.trajectory
    if quiet:
        # should be part of a try ... finally to guarantee restoring the log level
        logging.disable(logging.WARN)

    kwargs.setdefault('remarks', 'RMS fitted trajectory to reference')
    if filename is None:
        path, fn = os.path.split(frames.filename)
        filename = os.path.join(path, prefix + fn)
        _Writer = frames.Writer
    else:
        _Writer = frames.OtherWriter
    if os.path.exists(filename) and not force:
        logger.warn(
            "{0} already exists and will NOT be overwritten; use force=True if you want this"
            .format(filename))
        return filename
    writer = _Writer(filename, **kwargs)
    del _Writer

    select = _process_selection(select)
    ref_atoms = reference.selectAtoms(*select['reference'])
    traj_atoms = traj.selectAtoms(*select['mobile'])
    natoms = traj_atoms.numberOfAtoms()

    check_same_atoms(ref_atoms, traj_atoms, tol_mass=tol_mass)

    logger.info("RMS-fitting on %d atoms." % len(ref_atoms))
    if mass_weighted:
        # if performing a mass-weighted alignment/rmsd calculation
        weight = ref_atoms.masses() / ref_atoms.masses().mean()
    else:
        weight = None

    # reference centre of mass system
    # (compatibility with pre 1.0 numpy: explicitly cast coords to float32)
    ref_com = ref_atoms.centerOfMass().astype(numpy.float32)
    ref_coordinates = ref_atoms.coordinates() - ref_com

    # allocate the array for selection atom coords
    traj_coordinates = traj_atoms.coordinates().copy()

    # RMSD timeseries
    nframes = len(frames)
    rmsd = numpy.zeros((nframes, ))

    # R: rotation matrix that aligns r-r_com, x~-x~com
    #    (x~: selected coordinates, x: all coordinates)
    # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com
    rot = numpy.zeros(9, dtype=numpy.float64)  # allocate space for calculation
    R = numpy.matrix(rot.reshape(3, 3))

    percentage = ProgressMeter(
        nframes,
        interval=10,
        quiet=quiet,
        format="Fitted frame %(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")

    for k, ts in enumerate(frames):
        # shift coordinates for rotation fitting
        # selection is updated with the time frame
        x_com = traj_atoms.centerOfMass().astype(numpy.float32)
        traj_coordinates[:] = traj_atoms.coordinates() - x_com

        # Need to transpose coordinates such that the coordinate array is
        # 3xN instead of Nx3. Also qcp requires that the dtype be float64
        # (I think we swapped the position of ref and traj in CalcRMSDRotationalMatrix
        # so that R acts **to the left** and can be broadcasted; we're saving
        # one transpose. [orbeckst])
        rmsd[k] = qcp.CalcRMSDRotationalMatrix(
            ref_coordinates.T.astype(numpy.float64),
            traj_coordinates.T.astype(numpy.float64), natoms, rot, weight)
        R[:, :] = rot.reshape(3, 3)

        # Transform each atom in the trajectory (use inplace ops to avoid copying arrays)
        # (Marginally (~3%) faster than "ts._pos[:] = (ts._pos - x_com) * R + ref_com".)
        ts._pos -= x_com
        ts._pos[:] = ts._pos * R  # R acts to the left & is broadcasted N times.
        ts._pos += ref_com

        writer.write(traj.atoms)  # write whole input trajectory system
        percentage.echo(ts.frame)
    logger.info("Wrote %d RMS-fitted coordinate frames to file %r",
                frames.numframes, filename)
    if not rmsdfile is None:
        numpy.savetxt(rmsdfile, rmsd)
        logger.info("Wrote RMSD timeseries  to file %r", rmsdfile)

    if quiet:
        # should be part of a try ... finally to guarantee restoring the log level
        logging.disable(logging.NOTSET)

    return filename
def rms_fit_trj(traj, reference, select='all', filename=None, rmsdfile=None, prefix='rmsfit_',
                mass_weighted=False, tol_mass=0.1, strict=False, force=True, quiet=False, **kwargs):
    """RMS-fit trajectory to a reference structure using a selection.

    Both reference *ref* and trajectory *traj* must be
    :class:`MDAnalysis.Universe` instances. If they contain a
    trajectory then it is used. The output file format is determined
    by the file extension of *filename*. One can also use the same
    universe if one wants to fit to the current frame.

    :Arguments:
      *traj*
         trajectory, :class:`MDAnalysis.Universe` object
      *reference*
         reference coordinates; :class:`MDAnalysis.Universe` object
         (uses the current time step of the object)
      *select*
         1. any valid selection string for
            :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` that produces identical
            selections in *mobile* and *reference*; or
         2. a dictionary ``{'mobile':sel1, 'reference':sel2}`` (the
            :func:`fasta2select` function returns such a
            dictionary based on a ClustalW_ or STAMP_ sequence alignment); or
         3. a tuple ``(sel1, sel2)``

         When using 2. or 3. with *sel1* and *sel2* then these selections can also each be
         a list of selection strings (to generate a AtomGroup with defined atom order as
         described under :ref:`ordered-selections-label`).
      *filename*
         file name for the RMS-fitted trajectory or pdb; defaults to the
         original trajectory filename (from *traj*) with *prefix* prepended
      *rmsdfile*
         file name for writing the RMSD timeseries [``None``]
      *prefix*
         prefix for autogenerating the new output filename
      *mass_weighted*
         do a mass-weighted RMSD fit
      *tol_mass*
         Reject match if the atomic masses for matched atoms differ by more than
         *tol_mass* [0.1]
      *strict*
         Default: ``False``
         - ``True``: Will raise :exc:`SelectioError` if a single atom does not
           match between the two selections.
         - ``False``: Will try to prepare a matching selection by dropping
           residues with non-matching atoms. See :func:`get_matching_atoms`
           for details.
      *force*
         - ``True``: Overwrite an existing output trajectory (default)
         - ``False``: simply return if the file already exists
      *quiet*
         - ``True``: suppress progress and logging for levels INFO and below.
         - ``False``: show all status messages and do not change the the logging
           level (default)

         .. Note:: If


      *kwargs*
         All other keyword arguments are passed on the trajectory
         :class:`~MDAnalysis.coordinates.base.Writer`; this allows manipulating/fixing
         trajectories on the fly (e.g. change the output format by changing the extension of *filename*
         and setting different parameters as described for the corresponding writer).

    :Returns: *filename* (either provided or auto-generated)

    .. _ClustalW: http://www.clustal.org/
    .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/

    .. versionchanged:: 0.8
       Added *kwargs* to be passed to the trajectory :class:`~MDAnalysis.coordinates.base.Writer` and
       *filename* is returned.

    .. versionchanged:: 0.10.0
       Uses :func:`get_matching_atoms` to work with incomplete selections
       and new *strict* keyword. The new default is to be lenient whereas
       the old behavior was the equivalent of *strict* = ``True``.

    """
    frames = traj.trajectory
    if quiet:
        # should be part of a try ... finally to guarantee restoring the log level
        logging.disable(logging.WARN)

    kwargs.setdefault('remarks', 'RMS fitted trajectory to reference')
    if filename is None:
        path, fn = os.path.split(frames.filename)
        filename = os.path.join(path, prefix + fn)
        _Writer = frames.Writer
    else:
        _Writer = frames.OtherWriter
    if os.path.exists(filename) and not force:
        logger.warn("{0} already exists and will NOT be overwritten; use force=True if you want this".format(filename))
        return filename
    writer = _Writer(filename, **kwargs)
    del _Writer

    select = rms._process_selection(select)
    ref_atoms = reference.select_atoms(*select['reference'])
    traj_atoms = traj.select_atoms(*select['mobile'])
    natoms = traj_atoms.n_atoms

    ref_atoms, traj_atoms = get_matching_atoms(ref_atoms, traj_atoms,
                                                 tol_mass=tol_mass, strict=strict)

    logger.info("RMS-fitting on {0:d} atoms.".format(len(ref_atoms)))
    if mass_weighted:
        # if performing a mass-weighted alignment/rmsd calculation
        weight = ref_atoms.masses / ref_atoms.masses.mean()
    else:
        weight = None

    # reference centre of mass system
    ref_com = ref_atoms.center_of_mass()
    ref_coordinates = ref_atoms.positions - ref_com

    # allocate the array for selection atom coords
    traj_coordinates = traj_atoms.positions.copy()

    # RMSD timeseries
    nframes = len(frames)
    rmsd = np.zeros((nframes,))

    # R: rotation matrix that aligns r-r_com, x~-x~com
    #    (x~: selected coordinates, x: all coordinates)
    # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com
    rot = np.zeros(9, dtype=np.float64)  # allocate space for calculation
    R = np.matrix(rot.reshape(3, 3))

    percentage = ProgressMeter(nframes, interval=10, quiet=quiet,
                               format="Fitted frame %(step)5d/%(numsteps)d  [%(percentage)5.1f%%]\r")

    for k, ts in enumerate(frames):
        # shift coordinates for rotation fitting
        # selection is updated with the time frame
        x_com = traj_atoms.center_of_mass().astype(np.float32)
        traj_coordinates[:] = traj_atoms.positions - x_com

        # Need to transpose coordinates such that the coordinate array is
        # 3xN instead of Nx3. Also qcp requires that the dtype be float64
        # (I think we swapped the position of ref and traj in CalcRMSDRotationalMatrix
        # so that R acts **to the left** and can be broadcasted; we're saving
        # one transpose. [orbeckst])
        rmsd[k] = qcp.CalcRMSDRotationalMatrix(ref_coordinates.T.astype(np.float64),
                                               traj_coordinates.T.astype(np.float64),
                                               natoms, rot, weight)
        R[:, :] = rot.reshape(3, 3)

        # Transform each atom in the trajectory (use inplace ops to avoid copying arrays)
        # (Marginally (~3%) faster than "ts.positions[:] = (ts.positions - x_com) * R + ref_com".)
        ts.positions -= x_com
        ts.positions[:] = ts.positions * R  # R acts to the left & is broadcasted N times.
        ts.positions += ref_com

        writer.write(traj.atoms)  # write whole input trajectory system
        percentage.echo(ts.frame)
    logger.info("Wrote %d RMS-fitted coordinate frames to file %r",
                frames.n_frames, filename)
    if rmsdfile is not None:
        np.savetxt(rmsdfile, rmsd)
        logger.info("Wrote RMSD timeseries  to file %r", rmsdfile)

    if quiet:
        # should be part of a try ... finally to guarantee restoring the log level
        logging.disable(logging.NOTSET)

    return filename
Exemple #3
0
def alignto(mobile,
            reference,
            select="all",
            mass_weighted=False,
            subselection=None,
            tol_mass=0.1):
    """Spatially align *mobile* to *reference* by doing a RMSD fit on *select* atoms.

    The superposition is done in the following way:

    1. A rotation matrix is computed that minimizes the RMSD between
       the coordinates of `mobile.selectAtoms(sel1)` and
       `reference.selectAtoms(sel2)`; before the rotation, *mobile* is
       translated so that its center of geometry (or center of mass)
       coincides with the one of *reference*. (See below for explanation of
       how *sel1* and *sel2* are derived from *select*.)

    2. All atoms in :class:`~MDAnalysis.core.AtomGroup.Universe` that
       contains *mobile* are shifted and rotated. (See below for how
       to change this behavior through the *subselection* keyword.)

    The *mobile* and *reference* atom groups can be constructed so that they
    already match atom by atom. In this case, *select* should be set to "all"
    (or ``None``) so that no further selections are applied to *mobile* and
    *reference*, therefore preserving the exact atom ordering (see
    :ref:`ordered-selections-label`).

    .. Warning:: The atom order for *mobile* and *reference* is *only*
       preserved when *select* is either "all" or ``None``. In any other case,
       a new selection will be made that will sort the resulting AtomGroup by
       index and therefore destroy the correspondence between the two groups. **It
       is safest not to mix ordered AtomGroups with selection strings.**

    :Arguments:
      *mobile*
         structure to be aligned, a :class:`~MDAnalysis.core.AtomGroup.AtomGroup`
         or a whole :class:`~MDAnalysis.core.AtomGroup.Universe`
      *reference*
         reference structure, a :class:`~MDAnalysis.core.AtomGroup.AtomGroup`
         or a whole :class:`~MDAnalysis.core.AtomGroup.Universe`
      *select*
         1. any valid selection string for
            :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.selectAtoms` that produces identical
            selections in *mobile* and *reference*; or
         2. dictionary ``{'mobile':sel1, 'reference':sel2}``.
            (the :func:`fasta2select` function returns such a
            dictionary based on a ClustalW_ or STAMP_ sequence alignment); or
         3.  tuple ``(sel1, sel2)``

         When using 2. or 3. with *sel1* and *sel2* then these selections can also each be
         a list of selection strings (to generate a AtomGroup with defined atom order as
         described under :ref:`ordered-selections-label`).
      *mass_weighted* : boolean
         ``True`` uses the masses :meth:`reference.masses` as weights for the
         RMSD fit.
      *tol_mass*
         Reject match if the atomic masses for matched atoms differ by more than
         *tol_mass* [0.1]
      *subselection*
         Apply the transformation only to this selection.

         ``None`` [default]
             Apply to `mobile.universe.atoms` (i.e. all atoms in the
             context of the selection from *mobile* such as the rest of a
             protein, ligands and the surrounding water)
         *selection-string*
             Apply to `mobile.selectAtoms(selection-string)`
         :class:`~MDAnalysis.core.AtomGroup.AtomGroup`
             Apply to the arbitrary group of atoms

    :Returns: RMSD before and after spatial alignment.

    .. SeeAlso:: For RMSD-fitting trajectories it is more efficient to
                 use :func:`rms_fit_trj`.

    .. versionchanged:: 0.8
       Added check that the two groups describe the same atoms including
       the new *tol_mass* keyword.
    """
    if select in ('all', None):
        # keep the EXACT order in the input AtomGroups; selectAtoms('all')
        # orders them by index, which can lead to wrong results if the user
        # has crafted mobile and reference to match atom by atom
        mobile_atoms = mobile.atoms
        ref_atoms = reference.atoms
    else:
        select = _process_selection(select)
        mobile_atoms = mobile.selectAtoms(*select['mobile'])
        ref_atoms = reference.selectAtoms(*select['reference'])

    check_same_atoms(ref_atoms, mobile_atoms, tol_mass=tol_mass)

    if mass_weighted:
        weights = ref_atoms.masses() / numpy.mean(ref_atoms.masses())
        ref_com = ref_atoms.centerOfMass()
        mobile_com = mobile_atoms.centerOfMass()
    else:
        weights = None
        ref_com = ref_atoms.centerOfGeometry()
        mobile_com = mobile_atoms.centerOfGeometry()

    ref_coordinates = ref_atoms.coordinates() - ref_com
    mobile_coordinates = mobile_atoms.coordinates() - mobile_com

    old_rmsd = rmsd(mobile_atoms.coordinates(), ref_atoms.coordinates())

    R, new_rmsd = rotation_matrix(mobile_coordinates,
                                  ref_coordinates,
                                  weights=weights)

    if subselection is None:
        atoms = mobile.universe.atoms
    elif type(subselection) is str:
        atoms = mobile.selectAtoms(subselection)
    else:
        try:
            atoms = subselection.atoms
        except AttributeError:
            raise TypeError(
                "subselection must be a selection string, a AtomGroup or Universe or None"
            )

    atoms.translate(-mobile_com)
    atoms.rotate(R)
    atoms.translate(ref_com)

    return old_rmsd, new_rmsd
def alignto(mobile, reference, select="all", mass_weighted=False,
            subselection=None, tol_mass=0.1, strict=False):
    """Spatially align *mobile* to *reference* by doing a RMSD fit on *select* atoms.

    The superposition is done in the following way:

    1. A rotation matrix is computed that minimizes the RMSD between
       the coordinates of `mobile.select_atoms(sel1)` and
       `reference.select_atoms(sel2)`; before the rotation, *mobile* is
       translated so that its center of geometry (or center of mass)
       coincides with the one of *reference*. (See below for explanation of
       how *sel1* and *sel2* are derived from *select*.)

    2. All atoms in :class:`~MDAnalysis.core.AtomGroup.Universe` that
       contains *mobile* are shifted and rotated. (See below for how
       to change this behavior through the *subselection* keyword.)

    The *mobile* and *reference* atom groups can be constructed so that they
    already match atom by atom. In this case, *select* should be set to "all"
    (or ``None``) so that no further selections are applied to *mobile* and
    *reference*, therefore preserving the exact atom ordering (see
    :ref:`ordered-selections-label`).

    .. Warning:: The atom order for *mobile* and *reference* is *only*
       preserved when *select* is either "all" or ``None``. In any other case,
       a new selection will be made that will sort the resulting AtomGroup by
       index and therefore destroy the correspondence between the two groups. **It
       is safest not to mix ordered AtomGroups with selection strings.**

    :Arguments:
      *mobile*
         structure to be aligned, a :class:`~MDAnalysis.core.AtomGroup.AtomGroup`
         or a whole :class:`~MDAnalysis.core.AtomGroup.Universe`
      *reference*
         reference structure, a :class:`~MDAnalysis.core.AtomGroup.AtomGroup`
         or a whole :class:`~MDAnalysis.core.AtomGroup.Universe`
      *select*
         1. any valid selection string for
            :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` that produces identical
            selections in *mobile* and *reference*; or
         2. dictionary ``{'mobile':sel1, 'reference':sel2}``.
            (the :func:`fasta2select` function returns such a
            dictionary based on a ClustalW_ or STAMP_ sequence alignment); or
         3.  tuple ``(sel1, sel2)``

         When using 2. or 3. with *sel1* and *sel2* then these selections can also each be
         a list of selection strings (to generate a AtomGroup with defined atom order as
         described under :ref:`ordered-selections-label`).
      *mass_weighted* : boolean
         ``True`` uses the masses :meth:`reference.masses` as weights for the
         RMSD fit.
      *tol_mass*
         Reject match if the atomic masses for matched atoms differ by more than
         *tol_mass* [0.1]
      *strict*
         ``True``
             Will raise :exc:`SelectioError` if a single atom does not
             match between the two selections.
         ``False`` [default]
             Will try to prepare a matching selection by dropping
             residues with non-matching atoms. See :func:`get_matching_atoms`
             for details.
      *subselection*
         Apply the transformation only to this selection.

         ``None`` [default]
             Apply to `mobile.universe.atoms` (i.e. all atoms in the
             context of the selection from *mobile* such as the rest of a
             protein, ligands and the surrounding water)
         *selection-string*
             Apply to `mobile.select_atoms(selection-string)`
         :class:`~MDAnalysis.core.AtomGroup.AtomGroup`
             Apply to the arbitrary group of atoms

    :Returns: RMSD before and after spatial alignment.

    .. SeeAlso:: For RMSD-fitting trajectories it is more efficient to
                 use :func:`rms_fit_trj`.

    .. versionchanged:: 0.8
       Added check that the two groups describe the same atoms including
       the new *tol_mass* keyword.

    .. versionchanged:: 0.10.0
       Uses :func:`get_matching_atoms` to work with incomplete selections
       and new *strict* keyword. The new default is to be lenient whereas
       the old behavior was the equivalent of *strict* = ``True``.
    """
    if select in ('all', None):
        # keep the EXACT order in the input AtomGroups; select_atoms('all')
        # orders them by index, which can lead to wrong results if the user
        # has crafted mobile and reference to match atom by atom
        mobile_atoms = mobile.atoms
        ref_atoms = reference.atoms
    else:
        select = rms._process_selection(select)
        mobile_atoms = mobile.select_atoms(*select['mobile'])
        ref_atoms = reference.select_atoms(*select['reference'])

    ref_atoms, mobile_atoms = get_matching_atoms(ref_atoms, mobile_atoms,
                                                 tol_mass=tol_mass, strict=strict)

    if mass_weighted:
        weights = ref_atoms.masses / np.mean(ref_atoms.masses)
        ref_com = ref_atoms.center_of_mass()
        mobile_com = mobile_atoms.center_of_mass()
    else:
        weights = None
        ref_com = ref_atoms.center_of_geometry()
        mobile_com = mobile_atoms.center_of_geometry()

    ref_coordinates = ref_atoms.positions - ref_com
    mobile_coordinates = mobile_atoms.positions - mobile_com

    old_rmsd = rms.rmsd(mobile_coordinates, ref_coordinates)

    R, new_rmsd = rotation_matrix(mobile_coordinates, ref_coordinates, weights=weights)

    if subselection is None:
        atoms = mobile.universe.atoms
    elif type(subselection) is str:
        atoms = mobile.select_atoms(subselection)
    else:
        try:
            atoms = subselection.atoms
        except AttributeError:
            raise TypeError("subselection must be a selection string, a AtomGroup or Universe or None")

    atoms.translate(-mobile_com)
    atoms.rotate(R)
    atoms.translate(ref_com)

    return old_rmsd, new_rmsd