Example #1
0
def compute_dihedrals(traj, indices, periodic=True, opt=True):
    """Compute the dihedral angles between the supplied quartets of atoms in each frame in a trajectory.

    Parameters
    ----------
    traj : Trajectory
        An mtraj trajectory.
    indices : np.ndarray, shape=(n_dihedrals, 4), dtype=int
        Each row gives the indices of four atoms which together make a
        dihedral angle. The angle is between the planes spanned by the first
        three atoms and the last three atoms, a torsion around the bond
        between the middle two atoms.
    periodic : bool, default=True
        If `periodic` is True and the trajectory contains unitcell
        information, we will treat dihedrals that cross periodic images
        using the minimum image convention.
    opt : bool, default=True
        Use an optimized native library to calculate angles.

    Returns
    -------
    dihedrals : np.ndarray, shape=(n_frames, n_dihedrals), dtype=float
        The output array gives, in each frame from the trajectory, each of the
        `n_dihedrals` torsion angles. The angles are measured in **radians**.

    """
    xyz = ensure_type(traj.xyz,
                      dtype=np.float32,
                      ndim=3,
                      name='traj.xyz',
                      shape=(None, None, 3),
                      warn_on_cast=False)
    quartets = ensure_type(np.asarray(indices),
                           dtype=np.int32,
                           ndim=2,
                           name='indices',
                           shape=(None, 4),
                           warn_on_cast=False)
    if not np.all(np.logical_and(quartets < traj.n_atoms, quartets >= 0)):
        raise ValueError('indices must be between 0 and %d' % traj.n_atoms)

    out = np.zeros((xyz.shape[0], quartets.shape[0]), dtype=np.float32)
    if periodic is True and traj._have_unitcell:
        box = ensure_type(traj.unitcell_vectors,
                          dtype=np.float32,
                          ndim=3,
                          name='unitcell_vectors',
                          shape=(len(xyz), 3, 3))
        if opt and _geometry._processor_supports_sse41():
            _geometry._dihedral_mic(xyz, quartets, box, out)
            return out
        else:
            _dihedral(traj, quartets, periodic, out)
            return out

    if opt and _geometry._processor_supports_sse41():
        _geometry._dihedral(xyz, quartets, out)
    else:
        _dihedral(traj, quartets, periodic, out)
    return out
Example #2
0
def compute_distances(traj, atom_pairs, periodic=True, opt=True):
    """Compute the distances between pairs of atoms in each frame.

    Parameters
    ----------
    traj : Trajectory
        An mtraj trajectory.
    atom_pairs : np.ndarray, shape=(num_pairs, 2), dtype=int
        Each row gives the indices of two atoms involved in the interaction.
    periodic : bool, default=True
        If `periodic` is True and the trajectory contains unitcell
        information, we will compute distances under the minimum image
        convention.
    opt : bool, default=True
        Use an optimized native library to calculate distances. Our optimized
        SSE minimum image convention calculation implementation is over 1000x
        faster than the naive numpy implementation.

    Returns
    -------
    distances : np.ndarray, shape=(n_frames, num_pairs), dtype=float
        The distance, in each frame, between each pair of atoms.
    """
    xyz = ensure_type(traj.xyz,
                      dtype=np.float32,
                      ndim=3,
                      name='traj.xyz',
                      shape=(None, None, 3),
                      warn_on_cast=False)
    pairs = ensure_type(np.asarray(atom_pairs),
                        dtype=np.int32,
                        ndim=2,
                        name='atom_pairs',
                        shape=(None, 2),
                        warn_on_cast=False)
    if not np.all(np.logical_and(pairs < traj.n_atoms, pairs >= 0)):
        raise ValueError('atom_pairs must be between 0 and %d' % traj.n_atoms)

    if periodic is True and traj._have_unitcell:
        box = ensure_type(traj.unitcell_vectors,
                          dtype=np.float32,
                          ndim=3,
                          name='unitcell_vectors',
                          shape=(len(xyz), 3, 3))
        if opt and _geometry._processor_supports_sse41():
            out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32)
            _geometry._dist_mic(xyz, pairs, box, out)
            return out
        else:
            return _distance_mic(xyz, pairs, box)

    # either there are no unitcell vectors or they dont want to use them
    if opt and _geometry._processor_supports_sse41():
        out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32)
        _geometry._dist(xyz, pairs, out)
        return out
    else:
        return _distance(xyz, pairs)
Example #3
0
def compute_dihedrals(traj, indices, periodic=True, opt=True):
    """Compute the dihedral angles between the supplied quartets of atoms in each frame in a trajectory.

    Parameters
    ----------
    traj : Trajectory
        An mtraj trajectory.
    indices : np.ndarray, shape=(n_dihedrals, 4), dtype=int
        Each row gives the indices of four atoms which together make a
        dihedral angle. The angle is between the planes spanned by the first
        three atoms and the last three atoms, a torsion around the bond
        between the middle two atoms.
    periodic : bool, default=True
        If `periodic` is True and the trajectory contains unitcell
        information, we will treat dihedrals that cross periodic images
        using the minimum image convention.
    opt : bool, default=True
        Use an optimized native library to calculate angles.

    Returns
    -------
    dihedrals : np.ndarray, shape=(n_frames, n_dihedrals), dtype=float
        The output array gives, in each frame from the trajectory, each of the
        `n_dihedrals` torsion angles. The angles are measured in **radians**.

    """
    xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False)
    quartets = ensure_type(indices, dtype=np.int32, ndim=2, name='indices', shape=(None, 4), warn_on_cast=False)
    if not np.all(np.logical_and(quartets < traj.n_atoms, quartets >= 0)):
        raise ValueError('indices must be between 0 and %d' % traj.n_atoms)

    if len(quartets) == 0:
        return np.zeros((len(xyz), 0), dtype=np.float32)

    out = np.zeros((xyz.shape[0], quartets.shape[0]), dtype=np.float32)
    if periodic is True and traj._have_unitcell:
        box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3))
        if opt and _geometry._processor_supports_sse41():
            _geometry._dihedral_mic(xyz, quartets, box, out)
            return out
        else:
            _dihedral(traj, quartets, periodic, out)
            return out

    if opt and _geometry._processor_supports_sse41():
        _geometry._dihedral(xyz, quartets, out)
    else:
        _dihedral(traj, quartets, periodic, out)
    return out
Example #4
0
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960):
    """Compute the solvent accessible surface area of each atom in each simulation frame.

    Parameters
    ----------
    traj : Trajectory
        An mtraj trajectory.
    probe_radius : float, optional
        The radius of the probe, in nm.
    n_sphere_pts : int, optional
        The number of points representing the surface of each atom, higher
        values leads to more accuracy.

    Returns
    -------
    areas : np.array, shape=(n_frames, n_atoms)
        The accessible surface area of each atom in every frame

    Notes
    -----
    This code implements the Shrake and Rupley algorithm, with the Golden
    Section Spiral algorithm to generate the sphere points. The basic idea
    is to great a mesh of points representing the surface of each atom
    (at a distance of the van der waals radius plus the probe
    radius from the nuclei), and then count the number of such mesh points
    that are on the molecular surface -- i.e. not within the radius of another
    atom. Assuming that the points are evenly distributed, the number of points
    is directly proportional to the accessible surface area (its just 4*pi*r^2
    time the fraction of the points that are accessible).

    There are a number of different ways to generate the points on the sphere --
    possibly the best way would be to do a little "molecular dyanmics" : put the
    points on the sphere, and then run MD where all the points repel one another
    and wait for them to get to an energy minimum. But that sounds expensive.

    This code uses the golden section spiral algorithm
    (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/)
    where you make this spiral that traces out the unit sphere and then put points
    down equidistant along the spiral. It's cheap, but not perfect.

    The gromacs utility g_sas uses a slightly different algorithm for generating
    points on the sphere, which is based on an icosahedral tesselation.
    roughly, the icosahedral tesselation works something like this
    http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html

    References
    ----------
    .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71.
    """
    if not _geometry._processor_supports_sse41():
        raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)')

    xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False)
    out = np.zeros((xyz.shape[0], xyz.shape[1]), dtype=np.float32)
    atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms]
    radii = np.array(atom_radii, np.float32) + probe_radius

    _geometry._sasa(xyz, radii, int(n_sphere_points), out)

    return out
Example #5
0
def compute_distances(traj, atom_pairs, periodic=True, opt=True):
    """Compute the distances between pairs of atoms in each frame.

    Parameters
    ----------
    traj : Trajectory
        An mtraj trajectory.
    atom_pairs : np.ndarray, shape=(num_pairs, 2), dtype=int
        Each row gives the indices of two atoms involved in the interaction.
    periodic : bool, default=True
        If `periodic` is True and the trajectory contains unitcell
        information, we will compute distances under the minimum image
        convention.
    opt : bool, default=True
        Use an optimized native library to calculate distances. Our optimized
        SSE minimum image convention calculation implementation is over 1000x
        faster than the naive numpy implementation.

    Returns
    -------
    distances : np.ndarray, shape=(n_frames, num_pairs), dtype=float
        The distance, in each frame, between each pair of atoms.
    """
    xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False)
    pairs = ensure_type(atom_pairs, dtype=np.int32, ndim=2, name='atom_pairs', shape=(None, 2), warn_on_cast=False)
    if not np.all(np.logical_and(pairs < traj.n_atoms, pairs >= 0)):
        raise ValueError('atom_pairs must be between 0 and %d' % traj.n_atoms)

    if len(pairs) == 0:
        return np.zeros((len(xyz), 0), dtype=np.float32)

    if periodic is True and traj._have_unitcell:
        box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3))
        if opt and _geometry._processor_supports_sse41():
            out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32)
            _geometry._dist_mic(xyz, pairs, box, out)
            return out
        else:
            return _distance_mic(xyz, pairs, box)

    # either there are no unitcell vectors or they dont want to use them
    if opt and _geometry._processor_supports_sse41():
        out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32)
        _geometry._dist(xyz, pairs, out)
        return out
    else:
        return _distance(xyz, pairs)
Example #6
0
def compute_angles(traj, angle_indices, periodic=True, opt=True):
    """Compute the bond angles between the supplied triplets of indices in each frame of a trajectory.

    Parameters
    ----------
    traj : Trajectory
        An mtraj trajectory.
    angle_indices : np.ndarray, shape=(num_pairs, 2), dtype=int
       Each row gives the indices of three atoms which together make an angle.
    periodic : bool, default=True
        If `periodic` is True and the trajectory contains unitcell
        information, we will treat angles that cross periodic images using
        the minimum image convention.
    opt : bool, default=True
        Use an optimized native library to calculate distances. Our optimized
        SSE angle calculation implementation is 10-20x faster than the
        (itself optimized) numpy implementation.

    Returns
    -------
    angles : np.ndarray, shape=[n_frames, n_angles], dtype=float
        The angles are in radians
    """
    xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False)
    triplets = ensure_type(angle_indices, dtype=np.int32, ndim=2, name='angle_indices', shape=(None, 3), warn_on_cast=False)
    if not np.all(np.logical_and(triplets < traj.n_atoms, triplets >= 0)):
        raise ValueError('angle_indices must be between 0 and %d' % traj.n_atoms)

    if len(triplets) == 0:
        return np.zeros((len(xyz), 0), dtype=np.float32)

    out = np.zeros((xyz.shape[0], triplets.shape[0]), dtype=np.float32)
    if periodic is True and traj._have_unitcell:
        box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3))
        if opt and _geometry._processor_supports_sse41():
            _geometry._angle_mic(xyz, triplets, box, out)
            return out
        else:
            _angle(traj, triplets, periodic, out)
            return out

    if opt and _geometry._processor_supports_sse41():
        _geometry._angle(xyz, triplets, out)
    else:
        _angle(traj, triplets, periodic, out)
    return out
Example #7
0
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960, mode='atom'):
    """Compute the solvent accessible surface area of each atom or residue in each simulation frame.

    Parameters
    ----------
    traj : Trajectory
        An mtraj trajectory.
    probe_radius : float, optional
        The radius of the probe, in nm.
    n_sphere_pts : int, optional
        The number of points representing the surface of each atom, higher
        values leads to more accuracy.
    mode : {'atom', 'residue'}
        In mode == 'atom', the extracted areas are resolved per-atom
        In mode == 'residue', this is consolidated down to the
        per-residue SASA by summing over the atoms in each residue.

    Returns
    -------
    areas : np.array, shape=(n_frames, n_features)
        The accessible surface area of each atom or residue in every frame.
        If mode == 'atom', the second dimension will index the atoms in
        the trajectory, whereas if mode == 'residue', the second
        dimension will index the residues.

    Notes
    -----
    This code implements the Shrake and Rupley algorithm, with the Golden
    Section Spiral algorithm to generate the sphere points. The basic idea
    is to great a mesh of points representing the surface of each atom
    (at a distance of the van der waals radius plus the probe
    radius from the nuclei), and then count the number of such mesh points
    that are on the molecular surface -- i.e. not within the radius of another
    atom. Assuming that the points are evenly distributed, the number of points
    is directly proportional to the accessible surface area (its just 4*pi*r^2
    time the fraction of the points that are accessible).

    There are a number of different ways to generate the points on the sphere --
    possibly the best way would be to do a little "molecular dyanmics" : put the
    points on the sphere, and then run MD where all the points repel one another
    and wait for them to get to an energy minimum. But that sounds expensive.

    This code uses the golden section spiral algorithm
    (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/)
    where you make this spiral that traces out the unit sphere and then put points
    down equidistant along the spiral. It's cheap, but not perfect.

    The gromacs utility g_sas uses a slightly different algorithm for generating
    points on the sphere, which is based on an icosahedral tesselation.
    roughly, the icosahedral tesselation works something like this
    http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html

    References
    ----------
    .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71.
    """
    if not _geometry._processor_supports_sse41():
        raise RuntimeError(
            'This CPU does not support the required instruction set (SSE4.1)')

    xyz = ensure_type(traj.xyz,
                      dtype=np.float32,
                      ndim=3,
                      name='traj.xyz',
                      shape=(None, None, 3),
                      warn_on_cast=False)
    if mode == 'atom':
        dim1 = xyz.shape[1]
        atom_mapping = np.arange(dim1, dtype=np.int32)
    elif mode == 'residue':
        dim1 = traj.n_residues
        atom_mapping = np.array([a.residue.index for a in traj.top.atoms],
                                dtype=np.int32)
        if not np.all(
                np.unique(atom_mapping) == np.arange(1 +
                                                     np.max(atom_mapping))):
            raise ValueError('residues must have contiguous integer indices '
                             'starting from zero')
    else:
        raise ValueError(
            'mode must be one of "residue", "atom". "%s" supplied' % mode)

    out = np.zeros((xyz.shape[0], dim1), dtype=np.float32)
    atom_radii = [
        _ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms
    ]
    radii = np.array(atom_radii, np.float32) + probe_radius

    _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, out)

    return out
Example #8
0
def kabsch_sander(traj):
    """Compute the Kabsch-Sander hydrogen bond energy between each pair
    of residues in every frame.

    Hydrogen bonds are defined using an electrostatic definition, assuming
    partial charges of -0.42 e and +0.20 e to the carbonyl oxygen and amide
    hydrogen respectively, their opposites assigned to the carbonyl carbon
    and amide nitrogen. A hydrogen bond is identified if E in the following
    equation is less than -0.5 kcal/mol:

    .. math::

        E = 0.42 \cdot 0.2 \cdot 33.2 kcal/(mol \cdot nm) * \\
            (1/r_{ON} + 1/r_{CH} - 1/r_{OH} - 1/r_{CN})

    Parameters
    ----------
    traj : md.Trajectory
        An mdtraj trajectory. It must contain topology information.

    Returns
    -------
    matrices : list of scipy.sparse.csr_matrix
        The return value is a list of length equal to the number of frames
        in the trajectory. Each element is an n_residues x n_residues sparse
        matrix, where the existence of an entry at row `i`, column `j` with value
        `x` means that there exists a hydrogen bond between a backbone CO
        group at residue `i` with a backbone NH group at residue `j` whose
        Kabsch-Sander energy is less than -0.5 kcal/mol (the threshold for
        existence of the "bond"). The exact value of the energy is given by the
        value `x`.

    See Also
    --------
    wernet_nilsson, baker_hubbard

    References
    ----------
    .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211
    """
    if traj.topology is None:
        raise ValueError('kabsch_sander requires topology')
    if not _geometry._processor_supports_sse41():
        raise RuntimeError(
            'This CPU does not support the required instruction set (SSE4.1)')

    import scipy.sparse
    xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays(
        traj)
    n_residues = len(ca_indices)

    hbonds = np.empty((xyz.shape[0], n_residues, 2), np.int32)
    henergies = np.empty((xyz.shape[0], n_residues, 2), np.float32)
    hbonds.fill(-1)
    henergies.fill(np.nan)

    _geometry._kabsch_sander(xyz, nco_indices, ca_indices, proline_indices,
                             hbonds, henergies)

    # The C code returns its info in a pretty inconvenient format.
    # Let's change it to a list of scipy CSR matrices.

    matrices = []
    hbonds_mask = (hbonds != -1)
    for i in range(xyz.shape[0]):
        # appologies for this cryptic code -- we need to deal with the low
        # level aspects of the csr matrix format.
        hbonds_frame = hbonds[i]
        mask = hbonds_mask[i]
        henergies_frame = henergies[i]

        indptr = np.zeros(n_residues + 1, np.int32)
        indptr[1:] = np.cumsum(mask.sum(axis=1))
        indices = hbonds_frame[mask].flatten()
        data = henergies_frame[mask].flatten()

        matrices.append(
            scipy.sparse.csr_matrix((data, indices, indptr),
                                    shape=(n_residues, n_residues)).T)

    return matrices
Example #9
0
from __future__ import print_function
import os
import shutil
import itertools
import tempfile
import subprocess
from distutils.spawn import find_executable
from mdtraj.geometry._geometry import _processor_supports_sse41


import numpy as np
import mdtraj as md
from mdtraj.testing import get_fn, eq, DocStringFormatTester, skipif

HAVE_DSSP = find_executable('mkdssp')
SUPPORT_SSE41 = _processor_supports_sse41()
DSSP_MSG =  "This tests required mkdssp to be installed, from http://swift.cmbi.ru.nl/gv/dssp/"
tmpdir = None
SSE41_MSG = "This CPU does not support the required instructions"


def setup():
    global tmpdir
    tmpdir = tempfile.mkdtemp()

def teardown():
    shutil.rmtree(tmpdir)

def call_dssp(traj, frame=0):
    inp = os.path.join(tmpdir, 'temp.pdb')
    out = os.path.join(tmpdir, 'temp.pdb.dssp')
import numpy as np
import mdtraj as md
from numpy.testing.decorators import skipif
from mdtraj.testing import get_fn, eq
from mdtraj.geometry._geometry import _processor_supports_sse41
from msmbuilder.featurizer import SASAFeaturizer
sse41 = _processor_supports_sse41()


def _test_sasa_featurizer(t, value):
    sasa = md.shrake_rupley(t)
    rids = np.array([a.residue.index for a in t.top.atoms])

    for i, rid in enumerate(np.unique(rids)):
        mask = (rids == rid)
        eq(value[:, i], np.sum(sasa[:, mask], axis=1))


@skipif(not sse41, 'processor does not support sse41')
def test_sasa_featurizer_1():
    t = md.load(get_fn('frame0.h5'))
    value = SASAFeaturizer(mode='residue').partial_transform(t)
    assert value.shape == (t.n_frames, t.n_residues)
    _test_sasa_featurizer(t, value)


@skipif(not sse41, 'processor does not support sse41')
def test_sasa_featurizer_2():
    t = md.load(get_fn('frame0.h5'))

    # scramle the order of the atoms, and which residue each is a
Example #11
0
def kabsch_sander(traj):
    """Compute the Kabsch-Sander hydrogen bond energy between each pair
    of residues in every frame.

    Hydrogen bonds are defined using an electrostatic definition, assuming
    partial charges of -0.42 e and +0.20 e to the carbonyl oxygen and amide
    hydrogen respectively, their opposites assigned to the carbonyl carbon
    and amide nitrogen. A hydrogen bond is identified if E in the following
    equation is less than -0.5 kcal/mol:

    .. math::

        E = 0.42 \cdot 0.2 \cdot 33.2 kcal/(mol \cdot nm) * \\
            (1/r_{ON} + 1/r_{CH} - 1/r_{OH} - 1/r_{CN})

    Parameters
    ----------
    traj : md.Trajectory
        An mdtraj trajectory. It must contain topology information.

    Returns
    -------
    matrices : list of scipy.sparse.csr_matrix
        The return value is a list of length equal to the number of frames
        in the trajectory. Each element is an n_residues x n_residues sparse
        matrix, where the existence of an entry at row `i`, column `j` with value
        `x` means that there exists a hydrogen bond between a backbone CO
        group at residue `i` with a backbone NH group at residue `j` whose
        Kabsch-Sander energy is less than -0.5 kcal/mol (the threshold for
        existence of the "bond"). The exact value of the energy is given by the
        value `x`.

    See Also
    --------
    wernet_nilsson, baker_hubbard

    References
    ----------
    .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211
    """
    if traj.topology is None:
        raise ValueError('kabsch_sander requires topology')
    if not _geometry._processor_supports_sse41():
        raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)')

    import scipy.sparse
    xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays(traj)
    n_residues = len(ca_indices)

    hbonds = np.empty((xyz.shape[0], n_residues, 2), np.int32)
    henergies = np.empty((xyz.shape[0], n_residues, 2), np.float32)
    hbonds.fill(-1)
    henergies.fill(np.nan)

    _geometry._kabsch_sander(xyz, nco_indices, ca_indices, proline_indices,
                             hbonds, henergies)

    # The C code returns its info in a pretty inconvenient format.
    # Let's change it to a list of scipy CSR matrices.

    matrices = []
    hbonds_mask = (hbonds != -1)
    for i in range(xyz.shape[0]):
        # appologies for this cryptic code -- we need to deal with the low
        # level aspects of the csr matrix format.
        hbonds_frame = hbonds[i]
        mask = hbonds_mask[i]
        henergies_frame = henergies[i]

        indptr = np.zeros(n_residues + 1, np.int32)
        indptr[1:] = np.cumsum(mask.sum(axis=1))
        indices = hbonds_frame[mask].flatten()
        data = henergies_frame[mask].flatten()

        matrices.append(scipy.sparse.csr_matrix((data, indices, indptr), shape=(n_residues, n_residues)).T)

    return matrices
Example #12
0
topology1 = md.Topology()
topology1.add_atom('H', element.hydrogen,
                   topology1.add_residue('res', topology1.add_chain()))

# set up a mock topology with two atoms
topology2 = md.Topology()
_res2 = topology2.add_residue('res', topology2.add_chain())
topology2.add_atom('H', element.hydrogen, _res2)
topology2.add_atom('H', element.hydrogen, _res2)

##############################################################################
# Tests
##############################################################################


@skipif(not _processor_supports_sse41(),
        "This CPU does not support the required instructions")
def test_sasa_0():
    # make one atom at the origin
    traj = md.Trajectory(xyz=np.zeros((1, 1, 3)), topology=topology1)

    probe_radius = 0.14
    calc_area = np.sum(
        md.geometry.shrake_rupley(traj, probe_radius=probe_radius))
    true_area = 4 * np.pi * (_ATOMIC_RADII['H'] + probe_radius)**2

    assert_approx_equal(calc_area, true_area)


@skipif(not _processor_supports_sse41(),
        "This CPU does not support the required instructions")
Example #13
0
# set up a mock topology with 1 atom
topology1 = md.Topology()
topology1.add_atom('H', element.hydrogen, topology1.add_residue('res', topology1.add_chain()))

# set up a mock topology with two atoms
topology2 = md.Topology()
_res2 = topology2.add_residue('res', topology2.add_chain())
topology2.add_atom('H', element.hydrogen, _res2)
topology2.add_atom('H', element.hydrogen, _res2)


##############################################################################
# Tests
##############################################################################

@skipif(not _processor_supports_sse41(), "This CPU does not support the required instructions")
def test_sasa_0():
    # make one atom at the origin
    traj = md.Trajectory(xyz=np.zeros((1,1,3)), topology=topology1)

    probe_radius = 0.14
    calc_area = np.sum(md.geometry.shrake_rupley(traj, probe_radius=probe_radius))
    true_area = 4 * np.pi * (_ATOMIC_RADII['H'] + probe_radius)**2

    assert_approx_equal(calc_area, true_area)


@skipif(not _processor_supports_sse41(), "This CPU does not support the required instructions")
def test_sasa_1():
    # two atoms
    traj = md.Trajectory(xyz=np.zeros((1,2,3)), topology=topology2)
Example #14
0
import numpy as np
import mdtraj as md
from numpy.testing.decorators import skipif
from mdtraj.testing import get_fn, eq
from mdtraj.geometry._geometry import _processor_supports_sse41
from msmbuilder.featurizer import SASAFeaturizer

sse41 = _processor_supports_sse41()


def _test_sasa_featurizer(t, value):
    sasa = md.shrake_rupley(t)
    rids = np.array([a.residue.index for a in t.top.atoms])

    for i, rid in enumerate(np.unique(rids)):
        mask = (rids == rid)
        eq(value[:, i], np.sum(sasa[:, mask], axis=1))


@skipif(not sse41, 'processor does not support sse41')
def test_sasa_featurizer_1():
    t = md.load(get_fn('frame0.h5'))
    value = SASAFeaturizer(mode='residue').partial_transform(t)
    assert value.shape == (t.n_frames, t.n_residues)
    _test_sasa_featurizer(t, value)


@skipif(not sse41, 'processor does not support sse41')
def test_sasa_featurizer_2():
    t = md.load(get_fn('frame0.h5'))
Example #15
0
def compute_dssp(traj, simplified=True):
    """Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments
    
    Parameters
    ----------
    traj : md.Trajectory
        A trajectory

    Returns
    -------
    assignments : np.ndarray, shape=(n_frames, n_residues), dtype=S1
        The assignments is a 2D array of character codes (see below), giving
        the secondary structure of each residue in each frame.
    simplified  : bool, default=True.
        Use the simplified 3-category assignment scheme. Otherwise the original
        8-category scheme is used.

    Notes
    -----
    The DSSP assignment codes are:

       - 'H' : Alpha helix
       - 'B' : Residue in isolated beta-bridge
       - 'E' : Extended strand, participates in beta ladder
       - 'G' : 3-helix (3/10 helix)
       - 'I' : 5 helix (pi helix)
       - 'T' : hydrogen bonded turn
       - 'S' : bend
       - ' ' : Loops and irregular elements

    The simplified DSSP codes are:

       - 'H' : Helix. Either of the 'H', 'G', or 'I' codes.
       - 'E' : Strand. Either of the 'E', or 'B' codes.
       - 'C' : Coil. Either of the 'T', 'S' or ' ' codes.

    Our implementation is based on DSSP-2.2.0, written by Maarten L. Hekkelman
    and distributed under the Boost Software license.

    References
    ----------
    .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary
       structure: pattern recognition of hydrogen-bonded and geometrical
       features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211
    """
    if traj.topology is None:
        raise ValueError('kabsch_sander requires topology')
    if not _geometry._processor_supports_sse41():
        raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)')
    
    xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays(traj)
    chain_ids = np.array([r.chain.index for r in traj.top.residues], dtype=np.int32)

    value = _geometry._dssp(xyz, nco_indices, ca_indices, proline_indices, chain_ids)
    if simplified:
        value = value.translate(SIMPLIFIED_CODE_TRANSLATION)

    n_frames = xyz.shape[0]
    n_residues = nco_indices.shape[0]
    if PY2:
        array = np.fromstring(value, dtype=np.dtype('S1'))
    else:
        array = np.fromiter(value, dtype=np.dtype('U1'))
    return array.reshape(n_frames, n_residues)
Example #16
0
###############################################################################
# Globals
###############################################################################

HBondDocStringTester = DocStringFormatTester(md.geometry.hbond)
HAVE_DSSP = find_executable('mkdssp')
tmpdir = None

def setup():
    global tmpdir
    tmpdir = tempfile.mkdtemp()

def teardown():
    shutil.rmtree(tmpdir)

@skipif(not _processor_supports_sse41(), "This CPU does not support the required instructions")
def test_hbonds():
    t = md.load(get_fn('2EQQ.pdb'))
    ours = md.geometry.hbond.kabsch_sander(t)

@skipif(not HAVE_DSSP, "This tests required mkdssp to be installed, from http://swift.cmbi.ru.nl/gv/dssp/")
def test_hbonds_against_dssp():
    t = md.load(get_fn('2EQQ.pdb'))[0]
    pdb = os.path.join(tmpdir, 'f.pdb')
    dssp = os.path.join(tmpdir, 'f.pdb.dssp')
    t.save(pdb)

    cmd = ['mkdssp', '-i', pdb, '-o', dssp]
    subprocess.check_output(' '.join(cmd), shell=True)
    energy = scipy.sparse.lil_matrix((t.n_residues, t.n_residues))
Example #17
0
def compute_dssp(traj, simplified=True):
    """Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments
    
    Parameters
    ----------
    traj : md.Trajectory
        A trajectory

    Returns
    -------
    assignments : np.ndarray, shape=(n_frames, n_residues), dtype=S1
        The assignments is a 2D array of character codes (see below), giving
        the secondary structure of each residue in each frame.
    simplified  : bool, default=True.
        Use the simplified 3-category assignment scheme. Otherwise the original
        8-category scheme is used.

    Notes
    -----
    The DSSP assignment codes are:

       - 'H' : Alpha helix
       - 'B' : Residue in isolated beta-bridge
       - 'E' : Extended strand, participates in beta ladder
       - 'G' : 3-helix (3/10 helix)
       - 'I' : 5 helix (pi helix)
       - 'T' : hydrogen bonded turn
       - 'S' : bend
       - ' ' : Loops and irregular elements

    The simplified DSSP codes are:

       - 'H' : Helix. Either of the 'H', 'G', or 'I' codes.
       - 'E' : Strand. Either of the 'E', or 'B' codes.
       - 'C' : Coil. Either of the 'T', 'S' or ' ' codes.

    Our implementation is based on DSSP-2.2.0, written by Maarten L. Hekkelman
    and distributed under the Boost Software license.

    References
    ----------
    .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary
       structure: pattern recognition of hydrogen-bonded and geometrical
       features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211
    """
    if traj.topology is None:
        raise ValueError('kabsch_sander requires topology')
    if not _geometry._processor_supports_sse41():
        raise RuntimeError(
            'This CPU does not support the required instruction set (SSE4.1)')

    xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays(
        traj)
    chain_ids = np.array([r.chain.index for r in traj.top.residues],
                         dtype=np.int32)

    value = _geometry._dssp(xyz, nco_indices, ca_indices, proline_indices,
                            chain_ids)
    if simplified:
        value = value.translate(SIMPLIFIED_CODE_TRANSLATION)

    n_frames = xyz.shape[0]
    n_residues = nco_indices.shape[0]
    if PY2:
        array = np.fromstring(value, dtype=np.dtype('S1'))
    else:
        array = np.fromiter(value, dtype=np.dtype('U1'))
    return array.reshape(n_frames, n_residues)