def compute_dihedrals(traj, indices, periodic=True, opt=True): """Compute the dihedral angles between the supplied quartets of atoms in each frame in a trajectory. Parameters ---------- traj : Trajectory An mtraj trajectory. indices : np.ndarray, shape=(n_dihedrals, 4), dtype=int Each row gives the indices of four atoms which together make a dihedral angle. The angle is between the planes spanned by the first three atoms and the last three atoms, a torsion around the bond between the middle two atoms. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will treat dihedrals that cross periodic images using the minimum image convention. opt : bool, default=True Use an optimized native library to calculate angles. Returns ------- dihedrals : np.ndarray, shape=(n_frames, n_dihedrals), dtype=float The output array gives, in each frame from the trajectory, each of the `n_dihedrals` torsion angles. The angles are measured in **radians**. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) quartets = ensure_type(np.asarray(indices), dtype=np.int32, ndim=2, name='indices', shape=(None, 4), warn_on_cast=False) if not np.all(np.logical_and(quartets < traj.n_atoms, quartets >= 0)): raise ValueError('indices must be between 0 and %d' % traj.n_atoms) out = np.zeros((xyz.shape[0], quartets.shape[0]), dtype=np.float32) if periodic is True and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt and _geometry._processor_supports_sse41(): _geometry._dihedral_mic(xyz, quartets, box, out) return out else: _dihedral(traj, quartets, periodic, out) return out if opt and _geometry._processor_supports_sse41(): _geometry._dihedral(xyz, quartets, out) else: _dihedral(traj, quartets, periodic, out) return out
def compute_distances(traj, atom_pairs, periodic=True, opt=True): """Compute the distances between pairs of atoms in each frame. Parameters ---------- traj : Trajectory An mtraj trajectory. atom_pairs : np.ndarray, shape=(num_pairs, 2), dtype=int Each row gives the indices of two atoms involved in the interaction. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized SSE minimum image convention calculation implementation is over 1000x faster than the naive numpy implementation. Returns ------- distances : np.ndarray, shape=(n_frames, num_pairs), dtype=float The distance, in each frame, between each pair of atoms. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) pairs = ensure_type(np.asarray(atom_pairs), dtype=np.int32, ndim=2, name='atom_pairs', shape=(None, 2), warn_on_cast=False) if not np.all(np.logical_and(pairs < traj.n_atoms, pairs >= 0)): raise ValueError('atom_pairs must be between 0 and %d' % traj.n_atoms) if periodic is True and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt and _geometry._processor_supports_sse41(): out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32) _geometry._dist_mic(xyz, pairs, box, out) return out else: return _distance_mic(xyz, pairs, box) # either there are no unitcell vectors or they dont want to use them if opt and _geometry._processor_supports_sse41(): out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32) _geometry._dist(xyz, pairs, out) return out else: return _distance(xyz, pairs)
def compute_dihedrals(traj, indices, periodic=True, opt=True): """Compute the dihedral angles between the supplied quartets of atoms in each frame in a trajectory. Parameters ---------- traj : Trajectory An mtraj trajectory. indices : np.ndarray, shape=(n_dihedrals, 4), dtype=int Each row gives the indices of four atoms which together make a dihedral angle. The angle is between the planes spanned by the first three atoms and the last three atoms, a torsion around the bond between the middle two atoms. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will treat dihedrals that cross periodic images using the minimum image convention. opt : bool, default=True Use an optimized native library to calculate angles. Returns ------- dihedrals : np.ndarray, shape=(n_frames, n_dihedrals), dtype=float The output array gives, in each frame from the trajectory, each of the `n_dihedrals` torsion angles. The angles are measured in **radians**. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) quartets = ensure_type(indices, dtype=np.int32, ndim=2, name='indices', shape=(None, 4), warn_on_cast=False) if not np.all(np.logical_and(quartets < traj.n_atoms, quartets >= 0)): raise ValueError('indices must be between 0 and %d' % traj.n_atoms) if len(quartets) == 0: return np.zeros((len(xyz), 0), dtype=np.float32) out = np.zeros((xyz.shape[0], quartets.shape[0]), dtype=np.float32) if periodic is True and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt and _geometry._processor_supports_sse41(): _geometry._dihedral_mic(xyz, quartets, box, out) return out else: _dihedral(traj, quartets, periodic, out) return out if opt and _geometry._processor_supports_sse41(): _geometry._dihedral(xyz, quartets, out) else: _dihedral(traj, quartets, periodic, out) return out
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960): """Compute the solvent accessible surface area of each atom in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_pts : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. Returns ------- areas : np.array, shape=(n_frames, n_atoms) The accessible surface area of each atom in every frame Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ if not _geometry._processor_supports_sse41(): raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)') xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) out = np.zeros((xyz.shape[0], xyz.shape[1]), dtype=np.float32) atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), out) return out
def compute_distances(traj, atom_pairs, periodic=True, opt=True): """Compute the distances between pairs of atoms in each frame. Parameters ---------- traj : Trajectory An mtraj trajectory. atom_pairs : np.ndarray, shape=(num_pairs, 2), dtype=int Each row gives the indices of two atoms involved in the interaction. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized SSE minimum image convention calculation implementation is over 1000x faster than the naive numpy implementation. Returns ------- distances : np.ndarray, shape=(n_frames, num_pairs), dtype=float The distance, in each frame, between each pair of atoms. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) pairs = ensure_type(atom_pairs, dtype=np.int32, ndim=2, name='atom_pairs', shape=(None, 2), warn_on_cast=False) if not np.all(np.logical_and(pairs < traj.n_atoms, pairs >= 0)): raise ValueError('atom_pairs must be between 0 and %d' % traj.n_atoms) if len(pairs) == 0: return np.zeros((len(xyz), 0), dtype=np.float32) if periodic is True and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt and _geometry._processor_supports_sse41(): out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32) _geometry._dist_mic(xyz, pairs, box, out) return out else: return _distance_mic(xyz, pairs, box) # either there are no unitcell vectors or they dont want to use them if opt and _geometry._processor_supports_sse41(): out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32) _geometry._dist(xyz, pairs, out) return out else: return _distance(xyz, pairs)
def compute_angles(traj, angle_indices, periodic=True, opt=True): """Compute the bond angles between the supplied triplets of indices in each frame of a trajectory. Parameters ---------- traj : Trajectory An mtraj trajectory. angle_indices : np.ndarray, shape=(num_pairs, 2), dtype=int Each row gives the indices of three atoms which together make an angle. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will treat angles that cross periodic images using the minimum image convention. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized SSE angle calculation implementation is 10-20x faster than the (itself optimized) numpy implementation. Returns ------- angles : np.ndarray, shape=[n_frames, n_angles], dtype=float The angles are in radians """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) triplets = ensure_type(angle_indices, dtype=np.int32, ndim=2, name='angle_indices', shape=(None, 3), warn_on_cast=False) if not np.all(np.logical_and(triplets < traj.n_atoms, triplets >= 0)): raise ValueError('angle_indices must be between 0 and %d' % traj.n_atoms) if len(triplets) == 0: return np.zeros((len(xyz), 0), dtype=np.float32) out = np.zeros((xyz.shape[0], triplets.shape[0]), dtype=np.float32) if periodic is True and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt and _geometry._processor_supports_sse41(): _geometry._angle_mic(xyz, triplets, box, out) return out else: _angle(traj, triplets, periodic, out) return out if opt and _geometry._processor_supports_sse41(): _geometry._angle(xyz, triplets, out) else: _angle(traj, triplets, periodic, out) return out
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960, mode='atom'): """Compute the solvent accessible surface area of each atom or residue in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_pts : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. mode : {'atom', 'residue'} In mode == 'atom', the extracted areas are resolved per-atom In mode == 'residue', this is consolidated down to the per-residue SASA by summing over the atoms in each residue. Returns ------- areas : np.array, shape=(n_frames, n_features) The accessible surface area of each atom or residue in every frame. If mode == 'atom', the second dimension will index the atoms in the trajectory, whereas if mode == 'residue', the second dimension will index the residues. Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ if not _geometry._processor_supports_sse41(): raise RuntimeError( 'This CPU does not support the required instruction set (SSE4.1)') xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) if mode == 'atom': dim1 = xyz.shape[1] atom_mapping = np.arange(dim1, dtype=np.int32) elif mode == 'residue': dim1 = traj.n_residues atom_mapping = np.array([a.residue.index for a in traj.top.atoms], dtype=np.int32) if not np.all( np.unique(atom_mapping) == np.arange(1 + np.max(atom_mapping))): raise ValueError('residues must have contiguous integer indices ' 'starting from zero') else: raise ValueError( 'mode must be one of "residue", "atom". "%s" supplied' % mode) out = np.zeros((xyz.shape[0], dim1), dtype=np.float32) atom_radii = [ _ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms ] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, out) return out
def kabsch_sander(traj): """Compute the Kabsch-Sander hydrogen bond energy between each pair of residues in every frame. Hydrogen bonds are defined using an electrostatic definition, assuming partial charges of -0.42 e and +0.20 e to the carbonyl oxygen and amide hydrogen respectively, their opposites assigned to the carbonyl carbon and amide nitrogen. A hydrogen bond is identified if E in the following equation is less than -0.5 kcal/mol: .. math:: E = 0.42 \cdot 0.2 \cdot 33.2 kcal/(mol \cdot nm) * \\ (1/r_{ON} + 1/r_{CH} - 1/r_{OH} - 1/r_{CN}) Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. Returns ------- matrices : list of scipy.sparse.csr_matrix The return value is a list of length equal to the number of frames in the trajectory. Each element is an n_residues x n_residues sparse matrix, where the existence of an entry at row `i`, column `j` with value `x` means that there exists a hydrogen bond between a backbone CO group at residue `i` with a backbone NH group at residue `j` whose Kabsch-Sander energy is less than -0.5 kcal/mol (the threshold for existence of the "bond"). The exact value of the energy is given by the value `x`. See Also -------- wernet_nilsson, baker_hubbard References ---------- .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211 """ if traj.topology is None: raise ValueError('kabsch_sander requires topology') if not _geometry._processor_supports_sse41(): raise RuntimeError( 'This CPU does not support the required instruction set (SSE4.1)') import scipy.sparse xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays( traj) n_residues = len(ca_indices) hbonds = np.empty((xyz.shape[0], n_residues, 2), np.int32) henergies = np.empty((xyz.shape[0], n_residues, 2), np.float32) hbonds.fill(-1) henergies.fill(np.nan) _geometry._kabsch_sander(xyz, nco_indices, ca_indices, proline_indices, hbonds, henergies) # The C code returns its info in a pretty inconvenient format. # Let's change it to a list of scipy CSR matrices. matrices = [] hbonds_mask = (hbonds != -1) for i in range(xyz.shape[0]): # appologies for this cryptic code -- we need to deal with the low # level aspects of the csr matrix format. hbonds_frame = hbonds[i] mask = hbonds_mask[i] henergies_frame = henergies[i] indptr = np.zeros(n_residues + 1, np.int32) indptr[1:] = np.cumsum(mask.sum(axis=1)) indices = hbonds_frame[mask].flatten() data = henergies_frame[mask].flatten() matrices.append( scipy.sparse.csr_matrix((data, indices, indptr), shape=(n_residues, n_residues)).T) return matrices
from __future__ import print_function import os import shutil import itertools import tempfile import subprocess from distutils.spawn import find_executable from mdtraj.geometry._geometry import _processor_supports_sse41 import numpy as np import mdtraj as md from mdtraj.testing import get_fn, eq, DocStringFormatTester, skipif HAVE_DSSP = find_executable('mkdssp') SUPPORT_SSE41 = _processor_supports_sse41() DSSP_MSG = "This tests required mkdssp to be installed, from http://swift.cmbi.ru.nl/gv/dssp/" tmpdir = None SSE41_MSG = "This CPU does not support the required instructions" def setup(): global tmpdir tmpdir = tempfile.mkdtemp() def teardown(): shutil.rmtree(tmpdir) def call_dssp(traj, frame=0): inp = os.path.join(tmpdir, 'temp.pdb') out = os.path.join(tmpdir, 'temp.pdb.dssp')
import numpy as np import mdtraj as md from numpy.testing.decorators import skipif from mdtraj.testing import get_fn, eq from mdtraj.geometry._geometry import _processor_supports_sse41 from msmbuilder.featurizer import SASAFeaturizer sse41 = _processor_supports_sse41() def _test_sasa_featurizer(t, value): sasa = md.shrake_rupley(t) rids = np.array([a.residue.index for a in t.top.atoms]) for i, rid in enumerate(np.unique(rids)): mask = (rids == rid) eq(value[:, i], np.sum(sasa[:, mask], axis=1)) @skipif(not sse41, 'processor does not support sse41') def test_sasa_featurizer_1(): t = md.load(get_fn('frame0.h5')) value = SASAFeaturizer(mode='residue').partial_transform(t) assert value.shape == (t.n_frames, t.n_residues) _test_sasa_featurizer(t, value) @skipif(not sse41, 'processor does not support sse41') def test_sasa_featurizer_2(): t = md.load(get_fn('frame0.h5')) # scramle the order of the atoms, and which residue each is a
def kabsch_sander(traj): """Compute the Kabsch-Sander hydrogen bond energy between each pair of residues in every frame. Hydrogen bonds are defined using an electrostatic definition, assuming partial charges of -0.42 e and +0.20 e to the carbonyl oxygen and amide hydrogen respectively, their opposites assigned to the carbonyl carbon and amide nitrogen. A hydrogen bond is identified if E in the following equation is less than -0.5 kcal/mol: .. math:: E = 0.42 \cdot 0.2 \cdot 33.2 kcal/(mol \cdot nm) * \\ (1/r_{ON} + 1/r_{CH} - 1/r_{OH} - 1/r_{CN}) Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. Returns ------- matrices : list of scipy.sparse.csr_matrix The return value is a list of length equal to the number of frames in the trajectory. Each element is an n_residues x n_residues sparse matrix, where the existence of an entry at row `i`, column `j` with value `x` means that there exists a hydrogen bond between a backbone CO group at residue `i` with a backbone NH group at residue `j` whose Kabsch-Sander energy is less than -0.5 kcal/mol (the threshold for existence of the "bond"). The exact value of the energy is given by the value `x`. See Also -------- wernet_nilsson, baker_hubbard References ---------- .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211 """ if traj.topology is None: raise ValueError('kabsch_sander requires topology') if not _geometry._processor_supports_sse41(): raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)') import scipy.sparse xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays(traj) n_residues = len(ca_indices) hbonds = np.empty((xyz.shape[0], n_residues, 2), np.int32) henergies = np.empty((xyz.shape[0], n_residues, 2), np.float32) hbonds.fill(-1) henergies.fill(np.nan) _geometry._kabsch_sander(xyz, nco_indices, ca_indices, proline_indices, hbonds, henergies) # The C code returns its info in a pretty inconvenient format. # Let's change it to a list of scipy CSR matrices. matrices = [] hbonds_mask = (hbonds != -1) for i in range(xyz.shape[0]): # appologies for this cryptic code -- we need to deal with the low # level aspects of the csr matrix format. hbonds_frame = hbonds[i] mask = hbonds_mask[i] henergies_frame = henergies[i] indptr = np.zeros(n_residues + 1, np.int32) indptr[1:] = np.cumsum(mask.sum(axis=1)) indices = hbonds_frame[mask].flatten() data = henergies_frame[mask].flatten() matrices.append(scipy.sparse.csr_matrix((data, indices, indptr), shape=(n_residues, n_residues)).T) return matrices
topology1 = md.Topology() topology1.add_atom('H', element.hydrogen, topology1.add_residue('res', topology1.add_chain())) # set up a mock topology with two atoms topology2 = md.Topology() _res2 = topology2.add_residue('res', topology2.add_chain()) topology2.add_atom('H', element.hydrogen, _res2) topology2.add_atom('H', element.hydrogen, _res2) ############################################################################## # Tests ############################################################################## @skipif(not _processor_supports_sse41(), "This CPU does not support the required instructions") def test_sasa_0(): # make one atom at the origin traj = md.Trajectory(xyz=np.zeros((1, 1, 3)), topology=topology1) probe_radius = 0.14 calc_area = np.sum( md.geometry.shrake_rupley(traj, probe_radius=probe_radius)) true_area = 4 * np.pi * (_ATOMIC_RADII['H'] + probe_radius)**2 assert_approx_equal(calc_area, true_area) @skipif(not _processor_supports_sse41(), "This CPU does not support the required instructions")
# set up a mock topology with 1 atom topology1 = md.Topology() topology1.add_atom('H', element.hydrogen, topology1.add_residue('res', topology1.add_chain())) # set up a mock topology with two atoms topology2 = md.Topology() _res2 = topology2.add_residue('res', topology2.add_chain()) topology2.add_atom('H', element.hydrogen, _res2) topology2.add_atom('H', element.hydrogen, _res2) ############################################################################## # Tests ############################################################################## @skipif(not _processor_supports_sse41(), "This CPU does not support the required instructions") def test_sasa_0(): # make one atom at the origin traj = md.Trajectory(xyz=np.zeros((1,1,3)), topology=topology1) probe_radius = 0.14 calc_area = np.sum(md.geometry.shrake_rupley(traj, probe_radius=probe_radius)) true_area = 4 * np.pi * (_ATOMIC_RADII['H'] + probe_radius)**2 assert_approx_equal(calc_area, true_area) @skipif(not _processor_supports_sse41(), "This CPU does not support the required instructions") def test_sasa_1(): # two atoms traj = md.Trajectory(xyz=np.zeros((1,2,3)), topology=topology2)
import numpy as np import mdtraj as md from numpy.testing.decorators import skipif from mdtraj.testing import get_fn, eq from mdtraj.geometry._geometry import _processor_supports_sse41 from msmbuilder.featurizer import SASAFeaturizer sse41 = _processor_supports_sse41() def _test_sasa_featurizer(t, value): sasa = md.shrake_rupley(t) rids = np.array([a.residue.index for a in t.top.atoms]) for i, rid in enumerate(np.unique(rids)): mask = (rids == rid) eq(value[:, i], np.sum(sasa[:, mask], axis=1)) @skipif(not sse41, 'processor does not support sse41') def test_sasa_featurizer_1(): t = md.load(get_fn('frame0.h5')) value = SASAFeaturizer(mode='residue').partial_transform(t) assert value.shape == (t.n_frames, t.n_residues) _test_sasa_featurizer(t, value) @skipif(not sse41, 'processor does not support sse41') def test_sasa_featurizer_2(): t = md.load(get_fn('frame0.h5'))
def compute_dssp(traj, simplified=True): """Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments Parameters ---------- traj : md.Trajectory A trajectory Returns ------- assignments : np.ndarray, shape=(n_frames, n_residues), dtype=S1 The assignments is a 2D array of character codes (see below), giving the secondary structure of each residue in each frame. simplified : bool, default=True. Use the simplified 3-category assignment scheme. Otherwise the original 8-category scheme is used. Notes ----- The DSSP assignment codes are: - 'H' : Alpha helix - 'B' : Residue in isolated beta-bridge - 'E' : Extended strand, participates in beta ladder - 'G' : 3-helix (3/10 helix) - 'I' : 5 helix (pi helix) - 'T' : hydrogen bonded turn - 'S' : bend - ' ' : Loops and irregular elements The simplified DSSP codes are: - 'H' : Helix. Either of the 'H', 'G', or 'I' codes. - 'E' : Strand. Either of the 'E', or 'B' codes. - 'C' : Coil. Either of the 'T', 'S' or ' ' codes. Our implementation is based on DSSP-2.2.0, written by Maarten L. Hekkelman and distributed under the Boost Software license. References ---------- .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211 """ if traj.topology is None: raise ValueError('kabsch_sander requires topology') if not _geometry._processor_supports_sse41(): raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)') xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays(traj) chain_ids = np.array([r.chain.index for r in traj.top.residues], dtype=np.int32) value = _geometry._dssp(xyz, nco_indices, ca_indices, proline_indices, chain_ids) if simplified: value = value.translate(SIMPLIFIED_CODE_TRANSLATION) n_frames = xyz.shape[0] n_residues = nco_indices.shape[0] if PY2: array = np.fromstring(value, dtype=np.dtype('S1')) else: array = np.fromiter(value, dtype=np.dtype('U1')) return array.reshape(n_frames, n_residues)
############################################################################### # Globals ############################################################################### HBondDocStringTester = DocStringFormatTester(md.geometry.hbond) HAVE_DSSP = find_executable('mkdssp') tmpdir = None def setup(): global tmpdir tmpdir = tempfile.mkdtemp() def teardown(): shutil.rmtree(tmpdir) @skipif(not _processor_supports_sse41(), "This CPU does not support the required instructions") def test_hbonds(): t = md.load(get_fn('2EQQ.pdb')) ours = md.geometry.hbond.kabsch_sander(t) @skipif(not HAVE_DSSP, "This tests required mkdssp to be installed, from http://swift.cmbi.ru.nl/gv/dssp/") def test_hbonds_against_dssp(): t = md.load(get_fn('2EQQ.pdb'))[0] pdb = os.path.join(tmpdir, 'f.pdb') dssp = os.path.join(tmpdir, 'f.pdb.dssp') t.save(pdb) cmd = ['mkdssp', '-i', pdb, '-o', dssp] subprocess.check_output(' '.join(cmd), shell=True) energy = scipy.sparse.lil_matrix((t.n_residues, t.n_residues))
def compute_dssp(traj, simplified=True): """Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments Parameters ---------- traj : md.Trajectory A trajectory Returns ------- assignments : np.ndarray, shape=(n_frames, n_residues), dtype=S1 The assignments is a 2D array of character codes (see below), giving the secondary structure of each residue in each frame. simplified : bool, default=True. Use the simplified 3-category assignment scheme. Otherwise the original 8-category scheme is used. Notes ----- The DSSP assignment codes are: - 'H' : Alpha helix - 'B' : Residue in isolated beta-bridge - 'E' : Extended strand, participates in beta ladder - 'G' : 3-helix (3/10 helix) - 'I' : 5 helix (pi helix) - 'T' : hydrogen bonded turn - 'S' : bend - ' ' : Loops and irregular elements The simplified DSSP codes are: - 'H' : Helix. Either of the 'H', 'G', or 'I' codes. - 'E' : Strand. Either of the 'E', or 'B' codes. - 'C' : Coil. Either of the 'T', 'S' or ' ' codes. Our implementation is based on DSSP-2.2.0, written by Maarten L. Hekkelman and distributed under the Boost Software license. References ---------- .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211 """ if traj.topology is None: raise ValueError('kabsch_sander requires topology') if not _geometry._processor_supports_sse41(): raise RuntimeError( 'This CPU does not support the required instruction set (SSE4.1)') xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays( traj) chain_ids = np.array([r.chain.index for r in traj.top.residues], dtype=np.int32) value = _geometry._dssp(xyz, nco_indices, ca_indices, proline_indices, chain_ids) if simplified: value = value.translate(SIMPLIFIED_CODE_TRANSLATION) n_frames = xyz.shape[0] n_residues = nco_indices.shape[0] if PY2: array = np.fromstring(value, dtype=np.dtype('S1')) else: array = np.fromiter(value, dtype=np.dtype('U1')) return array.reshape(n_frames, n_residues)