Esempio n. 1
0
def compute_dssp(traj, simplified=True):
    """Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments

    Parameters
    ----------
    traj : md.Trajectory
        A trajectory
    simplified : bool, default=True
        Use the simplified 3-category assignment scheme. Otherwise the original
        8-category scheme is used.

    Returns
    -------
    assignments : np.ndarray, shape=(n_frames, n_residues), dtype=S1
        The assignments is a 2D array of character codes (see below), giving
        the secondary structure of each residue in each frame.

    Notes
    -----
    The DSSP assignment codes are:

       - 'H' : Alpha helix
       - 'B' : Residue in isolated beta-bridge
       - 'E' : Extended strand, participates in beta ladder
       - 'G' : 3-helix (3/10 helix)
       - 'I' : 5 helix (pi helix)
       - 'T' : hydrogen bonded turn
       - 'S' : bend
       - ' ' : Loops and irregular elements

    The simplified DSSP codes are:

       - 'H' : Helix. Either of the 'H', 'G', or 'I' codes.
       - 'E' : Strand. Either of the 'E', or 'B' codes.
       - 'C' : Coil. Either of the 'T', 'S' or ' ' codes.

    A special 'NA' code will be assigned to each 'residue' in the topology which
    isn't actually a protein residue (does not contain atoms with the names
    'CA', 'N', 'C', 'O'), such as water molecules that are listed as 'residue's
    in the topology.

    Our implementation is based on DSSP-2.2.0, written by Maarten L. Hekkelman
    and distributed under the Boost Software license.

    References
    ----------
    .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary
       structure: pattern recognition of hydrogen-bonded and geometrical
       features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211
    """
    if traj.topology is None:
        raise ValueError('kabsch_sander requires topology')

    xyz, nco_indices, ca_indices, proline_indices, protein_indices \
        = _prep_kabsch_sander_arrays(traj)
    chain_ids = np.array([r.chain.index for r in traj.top.residues],
                         dtype=np.int32)

    value = _geometry._dssp(xyz, nco_indices, ca_indices, proline_indices,
                            chain_ids)
    if simplified:
        value = value.translate(SIMPLIFIED_CODE_TRANSLATION)

    n_frames = xyz.shape[0]
    n_residues = nco_indices.shape[0]
    if PY2:
        array = np.fromiter(value, dtype=np.dtype('S2'))
    else:
        array = np.fromiter(value, dtype=np.dtype('U2'))

    array = array.reshape(n_frames, n_residues)
    array[:, np.logical_not(protein_indices)] = 'NA'
    return array
Esempio n. 2
0
def compute_dssp(traj, simplified=True):
    """Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments
    
    Parameters
    ----------
    traj : md.Trajectory
        A trajectory

    Returns
    -------
    assignments : np.ndarray, shape=(n_frames, n_residues), dtype=S1
        The assignments is a 2D array of character codes (see below), giving
        the secondary structure of each residue in each frame.
    simplified  : bool, default=True.
        Use the simplified 3-category assignment scheme. Otherwise the original
        8-category scheme is used.

    Notes
    -----
    The DSSP assignment codes are:

       - 'H' : Alpha helix
       - 'B' : Residue in isolated beta-bridge
       - 'E' : Extended strand, participates in beta ladder
       - 'G' : 3-helix (3/10 helix)
       - 'I' : 5 helix (pi helix)
       - 'T' : hydrogen bonded turn
       - 'S' : bend
       - ' ' : Loops and irregular elements

    The simplified DSSP codes are:

       - 'H' : Helix. Either of the 'H', 'G', or 'I' codes.
       - 'E' : Strand. Either of the 'E', or 'B' codes.
       - 'C' : Coil. Either of the 'T', 'S' or ' ' codes.

    Our implementation is based on DSSP-2.2.0, written by Maarten L. Hekkelman
    and distributed under the Boost Software license.

    References
    ----------
    .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary
       structure: pattern recognition of hydrogen-bonded and geometrical
       features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211
    """
    if traj.topology is None:
        raise ValueError('kabsch_sander requires topology')
    if not _geometry._processor_supports_sse41():
        raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)')
    
    xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays(traj)
    chain_ids = np.array([r.chain.index for r in traj.top.residues], dtype=np.int32)

    value = _geometry._dssp(xyz, nco_indices, ca_indices, proline_indices, chain_ids)
    if simplified:
        value = value.translate(SIMPLIFIED_CODE_TRANSLATION)

    n_frames = xyz.shape[0]
    n_residues = nco_indices.shape[0]
    if PY2:
        array = np.fromstring(value, dtype=np.dtype('S1'))
    else:
        array = np.fromiter(value, dtype=np.dtype('U1'))
    return array.reshape(n_frames, n_residues)
Esempio n. 3
0
def compute_dssp(traj, simplified=True):
    """Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments

    Parameters
    ----------
    traj : md.Trajectory
        A trajectory
    simplified : bool, default=True
        Use the simplified 3-category assignment scheme. Otherwise the original
        8-category scheme is used.

    Returns
    -------
    assignments : np.ndarray, shape=(n_frames, n_residues), dtype=S1
        The assignments is a 2D array of character codes (see below), giving
        the secondary structure of each residue in each frame.

    Notes
    -----
    The DSSP assignment codes are:

       - 'H' : Alpha helix
       - 'B' : Residue in isolated beta-bridge
       - 'E' : Extended strand, participates in beta ladder
       - 'G' : 3-helix (3/10 helix)
       - 'I' : 5 helix (pi helix)
       - 'T' : hydrogen bonded turn
       - 'S' : bend
       - ' ' : Loops and irregular elements

    The simplified DSSP codes are:

       - 'H' : Helix. Either of the 'H', 'G', or 'I' codes.
       - 'E' : Strand. Either of the 'E', or 'B' codes.
       - 'C' : Coil. Either of the 'T', 'S' or ' ' codes.

    A special 'NA' code will be assigned to each 'residue' in the topology which
    isn't actually a protein residue (does not contain atoms with the names
    'CA', 'N', 'C', 'O'), such as water molecules that are listed as 'residue's
    in the topology.

    Our implementation is based on DSSP-2.2.0, written by Maarten L. Hekkelman
    and distributed under the Boost Software license.

    References
    ----------
    .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary
       structure: pattern recognition of hydrogen-bonded and geometrical
       features". Biopolymers 22 (12): 2577-637. doi:10.1002/bip.360221211
    """
    if traj.topology is None:
        raise ValueError('kabsch_sander requires topology')

    xyz, nco_indices, ca_indices, proline_indices, protein_indices \
        = _prep_kabsch_sander_arrays(traj)
    chain_ids = np.array([r.chain.index for r in traj.top.residues], dtype=np.int32)

    value = _geometry._dssp(xyz, nco_indices, ca_indices, proline_indices, chain_ids)
    if simplified:
        value = value.translate(SIMPLIFIED_CODE_TRANSLATION)

    n_frames = xyz.shape[0]
    n_residues = nco_indices.shape[0]
    if PY2:
        array = np.fromiter(value, dtype=np.dtype('S2'))
    else:
        array = np.fromiter(value, dtype=np.dtype('U2'))

    array = array.reshape(n_frames, n_residues)
    array[:, np.logical_not(protein_indices)] = 'NA'
    return array