Example #1
0
def _compute_bounded_geometry(traj,
                              triplets,
                              distance_cutoff,
                              distance_indices,
                              angle_indices,
                              freq=0.0,
                              periodic=True):
    """
    Returns a tuple include
    (1) the mask for triplets that fulfill the distance criteria frequently enough,
    (2) the actual distances calculated, and
    (3) the angles between the triplets specified by angle_indices.
    """
    # First we calculate the requested distances
    distances = compute_distances(traj,
                                  triplets[:, distance_indices],
                                  periodic=periodic)

    # Now we discover which triplets meet the distance cutoff often enough
    prevalence = np.mean(distances < distance_cutoff, axis=0)
    mask = prevalence > freq

    # Update data structures to ignore anything that isn't possible anymore
    triplets = triplets.compress(mask, axis=0)
    distances = distances.compress(mask, axis=1)
    '''
    # Calculate angles using the law of cosines

    If angle_indices = [0,1,2],
    abc_pairs = [(0, 1), (1, 2), (2, 0)].
    '''

    abc_pairs = zip(angle_indices, angle_indices[1:] + angle_indices[:1])
    abc_distances = []

    # Calculate distances (if necessary)
    for abc_pair in abc_pairs:
        if set(abc_pair) == set(distance_indices):
            abc_distances.append(distances)
        else:
            abc_distances.append(
                compute_distances(traj,
                                  triplets[:, abc_pair],
                                  periodic=periodic))

    # Law of cosines calculation
    a, b, c = abc_distances
    cosines = (a**2 + b**2 - c**2) / (2 * a * b)
    np.clip(cosines, -1, 1, out=cosines)  # avoid NaN error
    angles = np.arccos(cosines)

    return mask, distances, angles
Example #2
0
File: hbond.py Project: cing/mdtraj
def _compute_bounded_geometry(traj, triplets, distance_cutoff, distance_indices,
                              angle_indices, freq=0.0, periodic=True):
    """
    Returns a tuple include (1) the mask for triplets that fulfill the distance
    criteria frequently enough, (2) the actual distances calculated, and (3) the
    angles between the triplets specified by angle_indices.
    """
    # First we calculate the requested distances
    distances = compute_distances(traj, triplets[:, distance_indices], periodic=periodic)

    # Now we discover which triplets meet the distance cutoff often enough
    prevalence = np.mean(distances < distance_cutoff, axis=0)
    mask = prevalence > freq

    # Update data structures to ignore anything that isn't possible anymore
    triplets = triplets.compress(mask, axis=0)
    distances = distances.compress(mask, axis=1)

    # Calculate angles using the law of cosines
    abc_pairs = zip(angle_indices, angle_indices[1:] + angle_indices[:1])
    abc_distances = []

    # Calculate distances (if necessary)
    for abc_pair in abc_pairs:
        if set(abc_pair) == set(distance_indices):
            abc_distances.append(distances)
        else:
            abc_distances.append(compute_distances(traj, triplets[:, abc_pair],
                periodic=periodic))

    # Law of cosines calculation
    a, b, c = abc_distances
    cosines = (a ** 2 + b ** 2 - c ** 2) / (2 * a * b)
    np.clip(cosines, -1, 1, out=cosines) # avoid NaN error
    angles = np.arccos(cosines)

    return mask, distances, angles
Example #3
0
def optimized_baker_hubbard(f,
                            top,
                            angle_triplets,
                            distance_pairs,
                            beg_ind,
                            traj,
                            freq=0.1,
                            exclude_water=True,
                            periodic=True):
    print("Start optimized_baker_hubbard()")
    # Cutoff criteria: these could be exposed as function arguments, or
    # modified if there are better definitions than the this one based only
    # on distances and angles
    # distance_cutoff = 0.25            # nanometers
    # angle_cutoff = 2.0*np.pi/3.0   # radians
    distance_cutoff = 0.30  # nanometers
    angle_cutoff = 11.0 * np.pi / 18.0  # radians

    if top is None:
        raise ValueError('baker_hubbard requires that traj contain topology '
                         'information')

    tic = time.clock()
    angles = compute_angles(traj, angle_triplets, periodic=periodic)
    print("angles", angles, "with nFrames: ", len(angles), " for numAngles ",
          len(angles[0]))
    distances = compute_distances(traj, distance_pairs, periodic=periodic)
    print("distances", distances, "with nFrames: ", len(distances),
          " for numDistances ", len(distances[0]))

    mask = np.logical_and(distances < distance_cutoff, angles > angle_cutoff)
    del angles
    del distances
    nChunkFrames = computeAndWriteHBondAllFrames(f, beg_ind, top, mask,
                                                 angle_triplets)
    toc = time.clock()
    chunkComputingTime = toc - tic
    print("Time to extract and write all hbFramePairs for beg_ind " +
          str(beg_ind) + " is: " + str(chunkComputingTime))
    return nChunkFrames, chunkComputingTime
Example #4
0
def wernet_nilsson(traj, exclude_water=True, periodic=True):
    """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor
    distance and angle according to the criterion outlined in [1].
    As opposed to Baker-Hubbard, this is a "cone" criterion where the
    distance cutoff depends on the angle.

    The criterion employed is :math:`r_\\text{DA} < 3.3 A - 0.00044*\\delta_{HDA}*\\delta_{HDA}`,
    where :math:`r_\\text{DA}` is the distance between donor and acceptor heavy atoms,
    and :math:`\\delta_{HDA}` is the angle made by the hydrogen atom, donor, and acceptor atoms,
    measured in degrees (zero in the case of a perfectly straight bond: D-H ... A).

    When donor the donor is 'O' and the acceptor is 'O', this corresponds to
    the definition established in [1]_. The donors considered by this method
    are NH and OH, and the acceptors considered are O and N. In the paper the only
    donor considered is OH.

    Parameters
    ----------
    traj : md.Trajectory
        An mdtraj trajectory. It must contain topology information.
    exclude_water : bool, default=True
        Exclude solvent molecules from consideration.
    periodic : bool, default=True
        Set to True to calculate displacements and angles across periodic box boundaries.

    Returns
    -------
    hbonds : list, len=n_frames
        A list containing the atom indices involved in each of the identified
        hydrogen bonds at each frame. Each element in the list is an array
        where each row contains three integer indices, `(d_i, h_i, a_i)`,
        such that `d_i` is the index of the donor atom, `h_i` the index
        of the hydrogen atom, and `a_i` the index of the acceptor atom involved
        in a hydrogen bond which occurs in that frame.

    Notes
    -----
    Each hydrogen bond is distinguished for the purpose of this function by the
    indices of the donor, hydrogen, and acceptor atoms. This means that, for
    example, when an ARG sidechain makes a hydrogen bond with its NH2 group,
    you might see what appear like double counting of the h-bonds, since the
    hydrogen bond formed via the H_1 and H_2 are counted separately, despite
    their "chemical indistinguishably"

    Examples
    --------
    >>> md.wernet_nilsson(t)
    array([[  0,  10,   8],
           [  0,  11,   7],
           [ 69,  73,  54],
           [ 76,  82,  65],
           [119, 131,  89],
           [140, 148, 265],
           [166, 177, 122],
           [181, 188, 231]])
    >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2]))
    >>> for hbond in hbonds:
    >>>     print label(hbond)
    GLU1-N -- GLU1-OE2
    GLU1-N -- GLU1-OE1
    GLY6-N -- SER4-O
    CYS7-N -- GLY5-O
    TYR11-N -- VAL8-O
    MET12-N -- LYS20-O

    See Also
    --------
    baker_hubbard, kabsch_sander

    References
    ----------
    .. [1] Wernet, Ph., L.G.M. Pettersson, and A. Nilsson, et al.
       "The Structure of the First Coordination Shell in Liquid Water." (2004)
       Science 304, 995-999.
    """

    distance_cutoff = 0.33
    angle_const = 0.000044
    angle_cutoff = 45

    if traj.topology is None:
        raise ValueError('wernet_nilsson requires that traj contain topology '
                         'information')

    def get_donors(e0, e1):
        elems = set((e0, e1))
        bonditer = traj.topology.bonds
        atoms = [(b[0], b[1]) for b in bonditer
                 if set((b[0].element.symbol, b[1].element.symbol)) == elems]

        indices = []
        for a0, a1 in atoms:
            if exclude_water and (a0.residue.name == 'HOH'
                                  or a1.residue.name == 'HOH'):
                continue
            pair = (a0.index, a1.index)
            # make sure to get the pair in the right order, so that the index
            # for e0 comes before e1
            if a0.element.symbol == e1:
                pair = pair[::-1]
            indices.append(pair)

        return indices

    nh_donors = get_donors('N', 'H')
    oh_donors = get_donors('O', 'H')
    xh_donors = np.array(nh_donors + oh_donors)

    if len(xh_donors) == 0:
        # if there are no hydrogens or protein in the trajectory, we get
        # no possible pairs and return nothing
        return [np.zeros((0, 3), dtype=int) for _ in range(traj.n_frames)]

    if not exclude_water:
        acceptors = [
            a.index for a in traj.topology.atoms
            if a.element.symbol == 'O' or a.element.symbol == 'N'
        ]
    else:
        acceptors = [
            a.index for a in traj.topology.atoms
            if (a.element.symbol == 'O' and a.residue.name != 'HOH')
            or a.element.symbol == 'N'
        ]

    # This is used to compute the angles
    angle_triplets = np.array([(e[0][1], e[0][0], e[1])
                               for e in product(xh_donors, acceptors)
                               if e[0][0] != e[1]])
    distance_pairs = angle_triplets[:, [0, 2]]  # possible O..acceptor pairs

    angles = compute_angles(traj, angle_triplets,
                            periodic=periodic) * 180.0 / np.pi  # degrees
    distances = compute_distances(traj,
                                  distance_pairs,
                                  periodic=periodic,
                                  opt=True)
    cutoffs = distance_cutoff - angle_const * angles**2

    mask = np.logical_and(distances < cutoffs, angles < angle_cutoff)

    # The triplets that are returned are O-H ... O, different
    # from what's used to compute the angles.
    angle_triplets2 = angle_triplets[:, [1, 0, 2]]
    return [angle_triplets2[i] for i in mask]
Example #5
0
def baker_hubbard(traj, freq=0.1, exclude_water=True, periodic=True):
    """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor
    distance and angle.

    The criterion employed is :math:`\\theta > 120` and
    :math:`r_\\text{H...Acceptor} < 2.5 A`.

    When donor the donor is 'N' and the acceptor is 'O', this corresponds to
    the definition established in [1]_. The donors considered by this method
    are NH and OH, and the acceptors considered are O and N.

    Parameters
    ----------
    traj : md.Trajectory
        An mdtraj trajectory. It must contain topology information.
    freq : float, default=0.1
        Return only hydrogen bonds that occur in greater this fraction of the
        frames in the trajectory.
    exclude_water : bool, default=True
        Exclude solvent molecules from consideration
    periodic : bool, default=True
        Set to True to calculate displacements and angles across periodic box boundaries.

    Returns
    -------
    hbonds : np.array, shape=[n_hbonds, 3], dtype=int
        An array containing the indices atoms involved in each of the identified
        hydrogen bonds. Each row contains three integer indices, `(d_i, h_i,
        a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index
        of the hydrogen atom, and `a_i` the index of the acceptor atom involved
        in a hydrogen bond which occurs (according to the definition above) in
        proportion greater than `freq` of the trajectory.

    Notes
    -----
    Each hydrogen bond is distinguished for the purpose of this function by the
    indices of the donor, hydrogen, and acceptor atoms. This means that, for
    example, when an ARG sidechain makes a hydrogen bond with its NH2 group,
    you might see what appear like double counting of the h-bonds, since the
    hydrogen bond formed via the H_1 and H_2 are counted separately, despite
    their "chemical indistinguishably"

    Examples
    --------
    >>> md.baker_hubbard(t)
    array([[  0,  10,   8],
           [  0,  11,   7],
           [ 69,  73,  54],
           [ 76,  82,  65],
           [119, 131,  89],
           [140, 148, 265],
           [166, 177, 122],
           [181, 188, 231]])
    >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2]))
    >>> for hbond in hbonds:
    >>>     print label(hbond)
    GLU1-N -- GLU1-OE2
    GLU1-N -- GLU1-OE1
    GLY6-N -- SER4-O
    CYS7-N -- GLY5-O
    TYR11-N -- VAL8-O
    MET12-N -- LYS20-O

    See Also
    --------
    kabsch_sander

    References
    ----------
    .. [1] Baker, E. N., and R. E. Hubbard. "Hydrogen bonding in globular
        proteins." Progress in Biophysics and Molecular Biology
        44.2 (1984): 97-179.
    """
    # Cutoff criteria: these could be exposed as function arguments, or
    # modified if there are better definitions than the this one based only
    # on distances and angles
    distance_cutoff = 0.25  # nanometers
    angle_cutoff = 2.0 * np.pi / 3.0  # radians

    if traj.topology is None:
        raise ValueError('baker_hubbard requires that traj contain topology '
                         'information')

    def get_donors(e0, e1):
        elems = set((e0, e1))
        bonditer = traj.topology.bonds
        atoms = [(b[0], b[1]) for b in bonditer
                 if set((b[0].element.symbol, b[1].element.symbol)) == elems]

        indices = []
        for a0, a1 in atoms:
            if exclude_water and (a0.residue.name == 'HOH'
                                  or a1.residue.name == 'HOH'):
                continue
            pair = (a0.index, a1.index)
            # make sure to get the pair in the right order, so that the index
            # for e0 comes before e1
            if a0.element.symbol == e1:
                pair = pair[::-1]
            indices.append(pair)

        return indices

    nh_donors = get_donors('N', 'H')
    oh_donors = get_donors('O', 'H')
    xh_donors = np.concatenate((nh_donors, oh_donors))

    if len(xh_donors) == 0:
        # if there are no hydrogens or protein in the trajectory, we get
        # no possible pairs and return nothing
        return np.zeros((0, 3), dtype=int)

    if not exclude_water:
        acceptors = [
            a.index for a in traj.topology.atoms
            if a.element.symbol == 'O' or a.element.symbol == 'N'
        ]
    else:
        acceptors = [
            a.index for a in traj.topology.atoms
            if (a.element.symbol == 'O' and a.residue.name != 'HOH')
            or a.element.symbol == 'N'
        ]

    angle_triplets = np.array([(e[0][0], e[0][1], e[1])
                               for e in product(xh_donors, acceptors)])
    distance_pairs = angle_triplets[:, [1, 2]]  # possible H..acceptor pairs

    angles = compute_angles(traj, angle_triplets, periodic=periodic)
    distances = compute_distances(traj, distance_pairs, periodic=periodic)

    mask = np.logical_and(distances < distance_cutoff, angles > angle_cutoff)
    # frequency of occurance of each hydrogen bond in the trajectory
    occurance = np.sum(mask, axis=0).astype(np.double) / traj.n_frames

    return angle_triplets[occurance > freq]
Example #6
0
def baker_hubbard(traj, freq=0.1, exclude_water=True):
    """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor
    distance and angle.

    The criterion employed is :math:`\\theta > 120` and
    :math:`r_\\text{H...Acceptor} < 2.5 A`.

    When donor the donor is 'N' and the acceptor is 'O', this corresponds to
    the definition established in [1]_. The donors considered by this method
    are NH and OH, and the acceptors considered are O.

    Parameters
    ----------
    traj : md.Trajectory
        An mdtraj trajectory. It must contain topology information.
    freq : float, default=0.1
        Return only hydrogen bonds that occur in greater this fraction of the
        frames in the trajectory.
    exclude_water : bool, default=True
        Exclude solvent molecules from consideration

    Notes
    -----
    .. [1] Baker, E. N., and R. E. Hubbard. "Hydrogen bonding in globular proteins." Progress in Biophysics and Molecular Biology 44.2 (1984): 97-179.

    Examples
    --------
    >>> md.baker_hubbard(t)                                    # doctest: +SKIP
    array([[  0,  10,   8],
           [  0,  11,   7],
           [ 69,  73,  54],
           [ 76,  82,  65],
           [119, 131,  89],
           [140, 148, 265],
           [166, 177, 122],
           [181, 188, 231]])
    >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2]))  # doctest: +SKIP
    >>> for hbond in hbonds:                                    # doctest: +SKIP
    >>>     print label(hbond)                                  # doctest: +SKIP
    GLU1-N -- GLU1-OE2 
    GLU1-N -- GLU1-OE1
    GLY6-N -- SER4-O
    CYS7-N -- GLY5-O
    TYR11-N -- VAL8-O
    MET12-N -- LYS20-O

    See Also
    --------
    kabsch_sander

    Returns
    -------
    hbonds : np.array, shape=[n_hbonds, 3], dtype=int
        An array containing the indices atoms involved in each of the identified
        hydrogen bonds. Each row contains three integer indices, `(d_i, h_i,
        a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index
        of the hydrogen atom, and `a_i` the index of the acceptor atom involved
        in a hydrogen bond which occurs (according to the definition above) in
        proportion greater than `freq` of the trajectory.
    """
    # Cutoff criteria: these could be exposed as function arguments, or
    # modified if there are better definitions than the this one based only
    # on distances and angles
    distance_cutoff = 0.25            # nanometers
    angle_cutoff = 2.0 * np.pi / 3.0  # radians

    if traj.topology is None:
        raise ValueError('baker_hubbard requires that traj contain topology '
                         'information')

    def get_donors(e0, e1):
        elems = set((e0, e1))
        bonditer = traj.topology.bonds
        atoms = [(b[0], b[1]) for b in bonditer if set((b[0].element.symbol, b[1].element.symbol)) == elems]

        indices = []
        for a0, a1 in atoms:
            if exclude_water and (a0.residue.name == 'HOH' or a1.residue.name == 'HOH'):
                continue
            pair = (a0.index, a1.index)
            # make sure to get the pair in the right order, so that the index
            # for e0 comes before e1
            if a0.element.symbol == e1:
                pair = pair[::-1]
            indices.append(pair)

        return indices

    nh_donors = get_donors('N', 'H')
    oh_donors = get_donors('O', 'H')
    xh_donors = np.concatenate((nh_donors, oh_donors))

    if not exclude_water:
        acceptors = [a.index for a in traj.topology.atoms if a.element.symbol == 'O']
    else:
        acceptors = [a.index for a in traj.topology.atoms if a.element.symbol == 'O' and a.residue.name != 'HOH']

    angle_triplets = np.array([(e[0][0], e[0][1], e[1]) for e in product(xh_donors, acceptors)])
    distance_pairs = angle_triplets[:, [1,2]]  # possible H..acceptor pairs

    angles = compute_angles(traj, angle_triplets)
    distances = compute_distances(traj, distance_pairs, periodic=False)

    mask = np.logical_and(distances < distance_cutoff, angles > angle_cutoff)
    # frequency of occurance of each hydrogen bond in the trajectory
    occurance = np.sum(mask, axis=0).astype(np.double) / traj.n_frames

    return angle_triplets[occurance > freq]
Example #7
0
def wernet_nilsson(traj, exclude_water=True, periodic=True):
    """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor
    distance and angle according to the criterion outlined in [1].
    As opposed to Baker-Hubbard, this is a "cone" criterion where the
    distance cutoff depends on the angle.

    The criterion employed is :math:`r_\\text{DA} < 3.3 A - 0.00044*\\delta_{HDA}*\\delta_{HDA}`,
    where :math:`r_\\text{DA}` is the distance between donor and acceptor heavy atoms,
    and :math:`\\delta_{HDA}` is the angle made by the hydrogen atom, donor, and acceptor atoms,
    measured in degrees (zero in the case of a perfectly straight bond: D-H ... A).

    When donor the donor is 'O' and the acceptor is 'O', this corresponds to
    the definition established in [1]_. The donors considered by this method
    are NH and OH, and the acceptors considered are O and N. In the paper the only
    donor considered is OH.

    Parameters
    ----------
    traj : md.Trajectory
        An mdtraj trajectory. It must contain topology information.
    exclude_water : bool, default=True
        Exclude solvent molecules from consideration.
    periodic : bool, default=True
        Set to True to calculate displacements and angles across periodic box boundaries.

    Returns
    -------
    hbonds : list, len=n_frames
        A list containing the atom indices involved in each of the identified
        hydrogen bonds at each frame. Each element in the list is an array
        where each row contains three integer indices, `(d_i, h_i, a_i)`,
        such that `d_i` is the index of the donor atom, `h_i` the index
        of the hydrogen atom, and `a_i` the index of the acceptor atom involved
        in a hydrogen bond which occurs in that frame.

    Notes
    -----
    Each hydrogen bond is distinguished for the purpose of this function by the
    indices of the donor, hydrogen, and acceptor atoms. This means that, for
    example, when an ARG sidechain makes a hydrogen bond with its NH2 group,
    you might see what appear like double counting of the h-bonds, since the
    hydrogen bond formed via the H_1 and H_2 are counted separately, despite
    their "chemical indistinguishably"

    Examples
    --------
    >>> md.wernet_nilsson(t)
    array([[  0,  10,   8],
           [  0,  11,   7],
           [ 69,  73,  54],
           [ 76,  82,  65],
           [119, 131,  89],
           [140, 148, 265],
           [166, 177, 122],
           [181, 188, 231]])
    >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2]))
    >>> for hbond in hbonds:
    >>>     print label(hbond)
    GLU1-N -- GLU1-OE2
    GLU1-N -- GLU1-OE1
    GLY6-N -- SER4-O
    CYS7-N -- GLY5-O
    TYR11-N -- VAL8-O
    MET12-N -- LYS20-O

    See Also
    --------
    baker_hubbard, kabsch_sander

    References
    ----------
    .. [1] Wernet, Ph., L.G.M. Pettersson, and A. Nilsson, et al.
       "The Structure of the First Coordination Shell in Liquid Water." (2004)
       Science 304, 995-999.
    """

    distance_cutoff = 0.33
    angle_const = 0.000044
    angle_cutoff = 45

    if traj.topology is None:
        raise ValueError('wernet_nilsson requires that traj contain topology '
                         'information')

    def get_donors(e0, e1):
        elems = set((e0, e1))
        bonditer = traj.topology.bonds
        atoms = [(b[0], b[1]) for b in bonditer if set((b[0].element.symbol, b[1].element.symbol)) == elems]

        indices = []
        for a0, a1 in atoms:
            if exclude_water and (a0.residue.name == 'HOH' or a1.residue.name == 'HOH'):
                continue
            pair = (a0.index, a1.index)
            # make sure to get the pair in the right order, so that the index
            # for e0 comes before e1
            if a0.element.symbol == e1:
                pair = pair[::-1]
            indices.append(pair)

        return indices

    nh_donors = get_donors('N', 'H')
    oh_donors = get_donors('O', 'H')
    xh_donors = np.array(nh_donors + oh_donors)

    if len(xh_donors) == 0:
        # if there are no hydrogens or protein in the trajectory, we get
        # no possible pairs and return nothing
        return [np.zeros((0, 3), dtype=int) for _ in range(traj.n_frames)]

    if not exclude_water:
        acceptors = [a.index for a in traj.topology.atoms if a.element.symbol == 'O' or a.element.symbol == 'N']
    else:
        acceptors = [a.index for a in traj.topology.atoms if (a.element.symbol == 'O' and a.residue.name != 'HOH') or a.element.symbol == 'N']

    # This is used to compute the angles
    angle_triplets = np.array([(e[0][1], e[0][0], e[1]) for e in product(xh_donors, acceptors) if e[0][0] != e[1]])
    distance_pairs = angle_triplets[:, [0,2]]  # possible O..acceptor pairs

    angles = compute_angles(traj, angle_triplets, periodic=periodic) * 180.0 / np.pi # degrees
    distances = compute_distances(traj, distance_pairs, periodic=periodic, opt=True)
    cutoffs = distance_cutoff - angle_const * angles ** 2

    mask = np.logical_and(distances < cutoffs, angles < angle_cutoff)

    # The triplets that are returned are O-H ... O, different
    # from what's used to compute the angles.
    angle_triplets2 = angle_triplets[:, [1,0,2]]
    return [angle_triplets2[i] for i in mask]
Example #8
0
def baker_hubbard(traj, freq=0.1, exclude_water=True, periodic=True, get_list=False):
    """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor
    distance and angle.

    The criterion employed is :math:`\\theta > 120` and
    :math:`r_\\text{H...Acceptor} < 2.5 A`.

    When donor the donor is 'N' and the acceptor is 'O', this corresponds to
    the definition established in [1]_. The donors considered by this method
    are NH and OH, and the acceptors considered are O and N.

    Parameters
    ----------
    traj : md.Trajectory
        An mdtraj trajectory. It must contain topology information.
    freq : float, default=0.1
        Return only hydrogen bonds that occur in greater this fraction of the
        frames in the trajectory.
    exclude_water : bool, default=True
        Exclude solvent molecules from consideration
    periodic : bool, default=True
        Set to True to calculate displacements and angles across periodic box boundaries.
    get_list : bool, default=False
        Set to True to obtain a list of hydrogen bonds that appear in each frame 
        (overrides the freq parameter).

    Returns
    -------
    hbonds : np.array, shape=[n_hbonds, 3], dtype=int
        (The default behavior if get_list == False)
        An array containing the indices atoms involved in each of the identified
        hydrogen bonds. Each row contains three integer indices, `(d_i, h_i,
        a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index
        of the hydrogen atom, and `a_i` the index of the acceptor atom involved
        in a hydrogen bond which occurs (according to the definition above) in
        proportion greater than `freq` of the trajectory.

    hbonds : list, len=n_frames
        (The behavior if get_list = True)
        A list containing the atom indices involved in each of the identified
        hydrogen bonds at each frame. Each element in the list is an array
        where each row contains three integer indices, `(d_i, h_i, a_i)`, 
        such that `d_i` is the index of the donor atom, `h_i` the index
        of the hydrogen atom, and `a_i` the index of the acceptor atom involved
        in a hydrogen bond which occurs in that frame.

    Notes
    -----
    Each hydrogen bond is distinguished for the purpose of this function by the
    indices of the donor, hydrogen, and acceptor atoms. This means that, for
    example, when an ARG sidechain makes a hydrogen bond with its NH2 group,
    you might see what appear like double counting of the h-bonds, since the
    hydrogen bond formed via the H_1 and H_2 are counted separately, despite
    their "chemical indistinguishably"

    Examples
    --------
    >>> md.baker_hubbard(t)
    array([[  0,  10,   8],
           [  0,  11,   7],
           [ 69,  73,  54],
           [ 76,  82,  65],
           [119, 131,  89],
           [140, 148, 265],
           [166, 177, 122],
           [181, 188, 231]])
    >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2]))
    >>> for hbond in hbonds:
    >>>     print label(hbond)
    GLU1-N -- GLU1-OE2
    GLU1-N -- GLU1-OE1
    GLY6-N -- SER4-O
    CYS7-N -- GLY5-O
    TYR11-N -- VAL8-O
    MET12-N -- LYS20-O

    See Also
    --------
    kabsch_sander

    References
    ----------
    .. [1] Baker, E. N., and R. E. Hubbard. "Hydrogen bonding in globular
        proteins." Progress in Biophysics and Molecular Biology
        44.2 (1984): 97-179.
    """
    # Cutoff criteria: these could be exposed as function arguments, or
    # modified if there are better definitions than the this one based only
    # on distances and angles
    distance_cutoff = 0.25            # nanometers
    angle_cutoff = 2.0 * np.pi / 3.0  # radians

    if traj.topology is None:
        raise ValueError('baker_hubbard requires that traj contain topology '
                         'information')

    def get_donors(e0, e1):
        elems = set((e0, e1))
        bonditer = traj.topology.bonds
        atoms = [(b[0], b[1]) for b in bonditer if set((b[0].element.symbol, b[1].element.symbol)) == elems]

        indices = []
        for a0, a1 in atoms:
            if exclude_water and (a0.residue.name == 'HOH' or a1.residue.name == 'HOH'):
                continue
            pair = (a0.index, a1.index)
            # make sure to get the pair in the right order, so that the index
            # for e0 comes before e1
            if a0.element.symbol == e1:
                pair = pair[::-1]
            indices.append(pair)

        return indices

    nh_donors = get_donors('N', 'H')
    oh_donors = get_donors('O', 'H')
    xh_donors = np.array(nh_donors + oh_donors)

    if len(xh_donors) == 0:
        # if there are no hydrogens or protein in the trajectory, we get
        # no possible pairs and return nothing
        return np.zeros((0, 3), dtype=int)

    if not exclude_water:
        acceptors = [a.index for a in traj.topology.atoms if a.element.symbol == 'O' or a.element.symbol == 'N']
    else:
        acceptors = [a.index for a in traj.topology.atoms if (a.element.symbol == 'O' and a.residue.name != 'HOH') or a.element.symbol == 'N']

    angle_triplets = np.array([(e[0][0], e[0][1], e[1]) for e in product(xh_donors, acceptors)])
    distance_pairs = angle_triplets[:, [1,2]]  # possible H..acceptor pairs

    angles = compute_angles(traj, angle_triplets, periodic=periodic)
    distances = compute_distances(traj, distance_pairs, periodic=periodic)

    mask = np.logical_and(distances < distance_cutoff, angles > angle_cutoff)
    # frequency of occurance of each hydrogen bond in the trajectory
    occurance = np.sum(mask, axis=0).astype(np.double) / traj.n_frames
    
    if get_list:
        return [angle_triplets[i] for i in mask]
    else:
        return angle_triplets[occurance > freq]