def optimized_baker_hubbard(f, top, angle_triplets, distance_pairs, beg_ind, traj, freq=0.1, exclude_water=True, periodic=True): print("Start optimized_baker_hubbard()") # Cutoff criteria: these could be exposed as function arguments, or # modified if there are better definitions than the this one based only # on distances and angles # distance_cutoff = 0.25 # nanometers # angle_cutoff = 2.0*np.pi/3.0 # radians distance_cutoff = 0.30 # nanometers angle_cutoff = 11.0 * np.pi / 18.0 # radians if top is None: raise ValueError('baker_hubbard requires that traj contain topology ' 'information') tic = time.clock() angles = compute_angles(traj, angle_triplets, periodic=periodic) print("angles", angles, "with nFrames: ", len(angles), " for numAngles ", len(angles[0])) distances = compute_distances(traj, distance_pairs, periodic=periodic) print("distances", distances, "with nFrames: ", len(distances), " for numDistances ", len(distances[0])) mask = np.logical_and(distances < distance_cutoff, angles > angle_cutoff) del angles del distances nChunkFrames = computeAndWriteHBondAllFrames(f, beg_ind, top, mask, angle_triplets) toc = time.clock() chunkComputingTime = toc - tic print("Time to extract and write all hbFramePairs for beg_ind " + str(beg_ind) + " is: " + str(chunkComputingTime)) return nChunkFrames, chunkComputingTime
def wernet_nilsson(traj, exclude_water=True, periodic=True): """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor distance and angle according to the criterion outlined in [1]. As opposed to Baker-Hubbard, this is a "cone" criterion where the distance cutoff depends on the angle. The criterion employed is :math:`r_\\text{DA} < 3.3 A - 0.00044*\\delta_{HDA}*\\delta_{HDA}`, where :math:`r_\\text{DA}` is the distance between donor and acceptor heavy atoms, and :math:`\\delta_{HDA}` is the angle made by the hydrogen atom, donor, and acceptor atoms, measured in degrees (zero in the case of a perfectly straight bond: D-H ... A). When donor the donor is 'O' and the acceptor is 'O', this corresponds to the definition established in [1]_. The donors considered by this method are NH and OH, and the acceptors considered are O and N. In the paper the only donor considered is OH. Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. exclude_water : bool, default=True Exclude solvent molecules from consideration. periodic : bool, default=True Set to True to calculate displacements and angles across periodic box boundaries. Returns ------- hbonds : list, len=n_frames A list containing the atom indices involved in each of the identified hydrogen bonds at each frame. Each element in the list is an array where each row contains three integer indices, `(d_i, h_i, a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index of the hydrogen atom, and `a_i` the index of the acceptor atom involved in a hydrogen bond which occurs in that frame. Notes ----- Each hydrogen bond is distinguished for the purpose of this function by the indices of the donor, hydrogen, and acceptor atoms. This means that, for example, when an ARG sidechain makes a hydrogen bond with its NH2 group, you might see what appear like double counting of the h-bonds, since the hydrogen bond formed via the H_1 and H_2 are counted separately, despite their "chemical indistinguishably" Examples -------- >>> md.wernet_nilsson(t) array([[ 0, 10, 8], [ 0, 11, 7], [ 69, 73, 54], [ 76, 82, 65], [119, 131, 89], [140, 148, 265], [166, 177, 122], [181, 188, 231]]) >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2])) >>> for hbond in hbonds: >>> print label(hbond) GLU1-N -- GLU1-OE2 GLU1-N -- GLU1-OE1 GLY6-N -- SER4-O CYS7-N -- GLY5-O TYR11-N -- VAL8-O MET12-N -- LYS20-O See Also -------- baker_hubbard, kabsch_sander References ---------- .. [1] Wernet, Ph., L.G.M. Pettersson, and A. Nilsson, et al. "The Structure of the First Coordination Shell in Liquid Water." (2004) Science 304, 995-999. """ distance_cutoff = 0.33 angle_const = 0.000044 angle_cutoff = 45 if traj.topology is None: raise ValueError('wernet_nilsson requires that traj contain topology ' 'information') def get_donors(e0, e1): elems = set((e0, e1)) bonditer = traj.topology.bonds atoms = [(b[0], b[1]) for b in bonditer if set((b[0].element.symbol, b[1].element.symbol)) == elems] indices = [] for a0, a1 in atoms: if exclude_water and (a0.residue.name == 'HOH' or a1.residue.name == 'HOH'): continue pair = (a0.index, a1.index) # make sure to get the pair in the right order, so that the index # for e0 comes before e1 if a0.element.symbol == e1: pair = pair[::-1] indices.append(pair) return indices nh_donors = get_donors('N', 'H') oh_donors = get_donors('O', 'H') xh_donors = np.array(nh_donors + oh_donors) if len(xh_donors) == 0: # if there are no hydrogens or protein in the trajectory, we get # no possible pairs and return nothing return [np.zeros((0, 3), dtype=int) for _ in range(traj.n_frames)] if not exclude_water: acceptors = [ a.index for a in traj.topology.atoms if a.element.symbol == 'O' or a.element.symbol == 'N' ] else: acceptors = [ a.index for a in traj.topology.atoms if (a.element.symbol == 'O' and a.residue.name != 'HOH') or a.element.symbol == 'N' ] # This is used to compute the angles angle_triplets = np.array([(e[0][1], e[0][0], e[1]) for e in product(xh_donors, acceptors) if e[0][0] != e[1]]) distance_pairs = angle_triplets[:, [0, 2]] # possible O..acceptor pairs angles = compute_angles(traj, angle_triplets, periodic=periodic) * 180.0 / np.pi # degrees distances = compute_distances(traj, distance_pairs, periodic=periodic, opt=True) cutoffs = distance_cutoff - angle_const * angles**2 mask = np.logical_and(distances < cutoffs, angles < angle_cutoff) # The triplets that are returned are O-H ... O, different # from what's used to compute the angles. angle_triplets2 = angle_triplets[:, [1, 0, 2]] return [angle_triplets2[i] for i in mask]
def baker_hubbard(traj, freq=0.1, exclude_water=True, periodic=True): """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor distance and angle. The criterion employed is :math:`\\theta > 120` and :math:`r_\\text{H...Acceptor} < 2.5 A`. When donor the donor is 'N' and the acceptor is 'O', this corresponds to the definition established in [1]_. The donors considered by this method are NH and OH, and the acceptors considered are O and N. Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. freq : float, default=0.1 Return only hydrogen bonds that occur in greater this fraction of the frames in the trajectory. exclude_water : bool, default=True Exclude solvent molecules from consideration periodic : bool, default=True Set to True to calculate displacements and angles across periodic box boundaries. Returns ------- hbonds : np.array, shape=[n_hbonds, 3], dtype=int An array containing the indices atoms involved in each of the identified hydrogen bonds. Each row contains three integer indices, `(d_i, h_i, a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index of the hydrogen atom, and `a_i` the index of the acceptor atom involved in a hydrogen bond which occurs (according to the definition above) in proportion greater than `freq` of the trajectory. Notes ----- Each hydrogen bond is distinguished for the purpose of this function by the indices of the donor, hydrogen, and acceptor atoms. This means that, for example, when an ARG sidechain makes a hydrogen bond with its NH2 group, you might see what appear like double counting of the h-bonds, since the hydrogen bond formed via the H_1 and H_2 are counted separately, despite their "chemical indistinguishably" Examples -------- >>> md.baker_hubbard(t) array([[ 0, 10, 8], [ 0, 11, 7], [ 69, 73, 54], [ 76, 82, 65], [119, 131, 89], [140, 148, 265], [166, 177, 122], [181, 188, 231]]) >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2])) >>> for hbond in hbonds: >>> print label(hbond) GLU1-N -- GLU1-OE2 GLU1-N -- GLU1-OE1 GLY6-N -- SER4-O CYS7-N -- GLY5-O TYR11-N -- VAL8-O MET12-N -- LYS20-O See Also -------- kabsch_sander References ---------- .. [1] Baker, E. N., and R. E. Hubbard. "Hydrogen bonding in globular proteins." Progress in Biophysics and Molecular Biology 44.2 (1984): 97-179. """ # Cutoff criteria: these could be exposed as function arguments, or # modified if there are better definitions than the this one based only # on distances and angles distance_cutoff = 0.25 # nanometers angle_cutoff = 2.0 * np.pi / 3.0 # radians if traj.topology is None: raise ValueError('baker_hubbard requires that traj contain topology ' 'information') def get_donors(e0, e1): elems = set((e0, e1)) bonditer = traj.topology.bonds atoms = [(b[0], b[1]) for b in bonditer if set((b[0].element.symbol, b[1].element.symbol)) == elems] indices = [] for a0, a1 in atoms: if exclude_water and (a0.residue.name == 'HOH' or a1.residue.name == 'HOH'): continue pair = (a0.index, a1.index) # make sure to get the pair in the right order, so that the index # for e0 comes before e1 if a0.element.symbol == e1: pair = pair[::-1] indices.append(pair) return indices nh_donors = get_donors('N', 'H') oh_donors = get_donors('O', 'H') xh_donors = np.concatenate((nh_donors, oh_donors)) if len(xh_donors) == 0: # if there are no hydrogens or protein in the trajectory, we get # no possible pairs and return nothing return np.zeros((0, 3), dtype=int) if not exclude_water: acceptors = [ a.index for a in traj.topology.atoms if a.element.symbol == 'O' or a.element.symbol == 'N' ] else: acceptors = [ a.index for a in traj.topology.atoms if (a.element.symbol == 'O' and a.residue.name != 'HOH') or a.element.symbol == 'N' ] angle_triplets = np.array([(e[0][0], e[0][1], e[1]) for e in product(xh_donors, acceptors)]) distance_pairs = angle_triplets[:, [1, 2]] # possible H..acceptor pairs angles = compute_angles(traj, angle_triplets, periodic=periodic) distances = compute_distances(traj, distance_pairs, periodic=periodic) mask = np.logical_and(distances < distance_cutoff, angles > angle_cutoff) # frequency of occurance of each hydrogen bond in the trajectory occurance = np.sum(mask, axis=0).astype(np.double) / traj.n_frames return angle_triplets[occurance > freq]
def baker_hubbard(traj, freq=0.1, exclude_water=True): """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor distance and angle. The criterion employed is :math:`\\theta > 120` and :math:`r_\\text{H...Acceptor} < 2.5 A`. When donor the donor is 'N' and the acceptor is 'O', this corresponds to the definition established in [1]_. The donors considered by this method are NH and OH, and the acceptors considered are O. Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. freq : float, default=0.1 Return only hydrogen bonds that occur in greater this fraction of the frames in the trajectory. exclude_water : bool, default=True Exclude solvent molecules from consideration Notes ----- .. [1] Baker, E. N., and R. E. Hubbard. "Hydrogen bonding in globular proteins." Progress in Biophysics and Molecular Biology 44.2 (1984): 97-179. Examples -------- >>> md.baker_hubbard(t) # doctest: +SKIP array([[ 0, 10, 8], [ 0, 11, 7], [ 69, 73, 54], [ 76, 82, 65], [119, 131, 89], [140, 148, 265], [166, 177, 122], [181, 188, 231]]) >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2])) # doctest: +SKIP >>> for hbond in hbonds: # doctest: +SKIP >>> print label(hbond) # doctest: +SKIP GLU1-N -- GLU1-OE2 GLU1-N -- GLU1-OE1 GLY6-N -- SER4-O CYS7-N -- GLY5-O TYR11-N -- VAL8-O MET12-N -- LYS20-O See Also -------- kabsch_sander Returns ------- hbonds : np.array, shape=[n_hbonds, 3], dtype=int An array containing the indices atoms involved in each of the identified hydrogen bonds. Each row contains three integer indices, `(d_i, h_i, a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index of the hydrogen atom, and `a_i` the index of the acceptor atom involved in a hydrogen bond which occurs (according to the definition above) in proportion greater than `freq` of the trajectory. """ # Cutoff criteria: these could be exposed as function arguments, or # modified if there are better definitions than the this one based only # on distances and angles distance_cutoff = 0.25 # nanometers angle_cutoff = 2.0 * np.pi / 3.0 # radians if traj.topology is None: raise ValueError('baker_hubbard requires that traj contain topology ' 'information') def get_donors(e0, e1): elems = set((e0, e1)) bonditer = traj.topology.bonds atoms = [(b[0], b[1]) for b in bonditer if set((b[0].element.symbol, b[1].element.symbol)) == elems] indices = [] for a0, a1 in atoms: if exclude_water and (a0.residue.name == 'HOH' or a1.residue.name == 'HOH'): continue pair = (a0.index, a1.index) # make sure to get the pair in the right order, so that the index # for e0 comes before e1 if a0.element.symbol == e1: pair = pair[::-1] indices.append(pair) return indices nh_donors = get_donors('N', 'H') oh_donors = get_donors('O', 'H') xh_donors = np.concatenate((nh_donors, oh_donors)) if not exclude_water: acceptors = [a.index for a in traj.topology.atoms if a.element.symbol == 'O'] else: acceptors = [a.index for a in traj.topology.atoms if a.element.symbol == 'O' and a.residue.name != 'HOH'] angle_triplets = np.array([(e[0][0], e[0][1], e[1]) for e in product(xh_donors, acceptors)]) distance_pairs = angle_triplets[:, [1,2]] # possible H..acceptor pairs angles = compute_angles(traj, angle_triplets) distances = compute_distances(traj, distance_pairs, periodic=False) mask = np.logical_and(distances < distance_cutoff, angles > angle_cutoff) # frequency of occurance of each hydrogen bond in the trajectory occurance = np.sum(mask, axis=0).astype(np.double) / traj.n_frames return angle_triplets[occurance > freq]
def wernet_nilsson(traj, exclude_water=True, periodic=True): """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor distance and angle according to the criterion outlined in [1]. As opposed to Baker-Hubbard, this is a "cone" criterion where the distance cutoff depends on the angle. The criterion employed is :math:`r_\\text{DA} < 3.3 A - 0.00044*\\delta_{HDA}*\\delta_{HDA}`, where :math:`r_\\text{DA}` is the distance between donor and acceptor heavy atoms, and :math:`\\delta_{HDA}` is the angle made by the hydrogen atom, donor, and acceptor atoms, measured in degrees (zero in the case of a perfectly straight bond: D-H ... A). When donor the donor is 'O' and the acceptor is 'O', this corresponds to the definition established in [1]_. The donors considered by this method are NH and OH, and the acceptors considered are O and N. In the paper the only donor considered is OH. Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. exclude_water : bool, default=True Exclude solvent molecules from consideration. periodic : bool, default=True Set to True to calculate displacements and angles across periodic box boundaries. Returns ------- hbonds : list, len=n_frames A list containing the atom indices involved in each of the identified hydrogen bonds at each frame. Each element in the list is an array where each row contains three integer indices, `(d_i, h_i, a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index of the hydrogen atom, and `a_i` the index of the acceptor atom involved in a hydrogen bond which occurs in that frame. Notes ----- Each hydrogen bond is distinguished for the purpose of this function by the indices of the donor, hydrogen, and acceptor atoms. This means that, for example, when an ARG sidechain makes a hydrogen bond with its NH2 group, you might see what appear like double counting of the h-bonds, since the hydrogen bond formed via the H_1 and H_2 are counted separately, despite their "chemical indistinguishably" Examples -------- >>> md.wernet_nilsson(t) array([[ 0, 10, 8], [ 0, 11, 7], [ 69, 73, 54], [ 76, 82, 65], [119, 131, 89], [140, 148, 265], [166, 177, 122], [181, 188, 231]]) >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2])) >>> for hbond in hbonds: >>> print label(hbond) GLU1-N -- GLU1-OE2 GLU1-N -- GLU1-OE1 GLY6-N -- SER4-O CYS7-N -- GLY5-O TYR11-N -- VAL8-O MET12-N -- LYS20-O See Also -------- baker_hubbard, kabsch_sander References ---------- .. [1] Wernet, Ph., L.G.M. Pettersson, and A. Nilsson, et al. "The Structure of the First Coordination Shell in Liquid Water." (2004) Science 304, 995-999. """ distance_cutoff = 0.33 angle_const = 0.000044 angle_cutoff = 45 if traj.topology is None: raise ValueError('wernet_nilsson requires that traj contain topology ' 'information') def get_donors(e0, e1): elems = set((e0, e1)) bonditer = traj.topology.bonds atoms = [(b[0], b[1]) for b in bonditer if set((b[0].element.symbol, b[1].element.symbol)) == elems] indices = [] for a0, a1 in atoms: if exclude_water and (a0.residue.name == 'HOH' or a1.residue.name == 'HOH'): continue pair = (a0.index, a1.index) # make sure to get the pair in the right order, so that the index # for e0 comes before e1 if a0.element.symbol == e1: pair = pair[::-1] indices.append(pair) return indices nh_donors = get_donors('N', 'H') oh_donors = get_donors('O', 'H') xh_donors = np.array(nh_donors + oh_donors) if len(xh_donors) == 0: # if there are no hydrogens or protein in the trajectory, we get # no possible pairs and return nothing return [np.zeros((0, 3), dtype=int) for _ in range(traj.n_frames)] if not exclude_water: acceptors = [a.index for a in traj.topology.atoms if a.element.symbol == 'O' or a.element.symbol == 'N'] else: acceptors = [a.index for a in traj.topology.atoms if (a.element.symbol == 'O' and a.residue.name != 'HOH') or a.element.symbol == 'N'] # This is used to compute the angles angle_triplets = np.array([(e[0][1], e[0][0], e[1]) for e in product(xh_donors, acceptors) if e[0][0] != e[1]]) distance_pairs = angle_triplets[:, [0,2]] # possible O..acceptor pairs angles = compute_angles(traj, angle_triplets, periodic=periodic) * 180.0 / np.pi # degrees distances = compute_distances(traj, distance_pairs, periodic=periodic, opt=True) cutoffs = distance_cutoff - angle_const * angles ** 2 mask = np.logical_and(distances < cutoffs, angles < angle_cutoff) # The triplets that are returned are O-H ... O, different # from what's used to compute the angles. angle_triplets2 = angle_triplets[:, [1,0,2]] return [angle_triplets2[i] for i in mask]
def baker_hubbard(traj, freq=0.1, exclude_water=True, periodic=True, get_list=False): """Identify hydrogen bonds based on cutoffs for the Donor-H...Acceptor distance and angle. The criterion employed is :math:`\\theta > 120` and :math:`r_\\text{H...Acceptor} < 2.5 A`. When donor the donor is 'N' and the acceptor is 'O', this corresponds to the definition established in [1]_. The donors considered by this method are NH and OH, and the acceptors considered are O and N. Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. freq : float, default=0.1 Return only hydrogen bonds that occur in greater this fraction of the frames in the trajectory. exclude_water : bool, default=True Exclude solvent molecules from consideration periodic : bool, default=True Set to True to calculate displacements and angles across periodic box boundaries. get_list : bool, default=False Set to True to obtain a list of hydrogen bonds that appear in each frame (overrides the freq parameter). Returns ------- hbonds : np.array, shape=[n_hbonds, 3], dtype=int (The default behavior if get_list == False) An array containing the indices atoms involved in each of the identified hydrogen bonds. Each row contains three integer indices, `(d_i, h_i, a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index of the hydrogen atom, and `a_i` the index of the acceptor atom involved in a hydrogen bond which occurs (according to the definition above) in proportion greater than `freq` of the trajectory. hbonds : list, len=n_frames (The behavior if get_list = True) A list containing the atom indices involved in each of the identified hydrogen bonds at each frame. Each element in the list is an array where each row contains three integer indices, `(d_i, h_i, a_i)`, such that `d_i` is the index of the donor atom, `h_i` the index of the hydrogen atom, and `a_i` the index of the acceptor atom involved in a hydrogen bond which occurs in that frame. Notes ----- Each hydrogen bond is distinguished for the purpose of this function by the indices of the donor, hydrogen, and acceptor atoms. This means that, for example, when an ARG sidechain makes a hydrogen bond with its NH2 group, you might see what appear like double counting of the h-bonds, since the hydrogen bond formed via the H_1 and H_2 are counted separately, despite their "chemical indistinguishably" Examples -------- >>> md.baker_hubbard(t) array([[ 0, 10, 8], [ 0, 11, 7], [ 69, 73, 54], [ 76, 82, 65], [119, 131, 89], [140, 148, 265], [166, 177, 122], [181, 188, 231]]) >>> label = lambda hbond : '%s -- %s' % (t.topology.atom(hbond[0]), t.topology.atom(hbond[2])) >>> for hbond in hbonds: >>> print label(hbond) GLU1-N -- GLU1-OE2 GLU1-N -- GLU1-OE1 GLY6-N -- SER4-O CYS7-N -- GLY5-O TYR11-N -- VAL8-O MET12-N -- LYS20-O See Also -------- kabsch_sander References ---------- .. [1] Baker, E. N., and R. E. Hubbard. "Hydrogen bonding in globular proteins." Progress in Biophysics and Molecular Biology 44.2 (1984): 97-179. """ # Cutoff criteria: these could be exposed as function arguments, or # modified if there are better definitions than the this one based only # on distances and angles distance_cutoff = 0.25 # nanometers angle_cutoff = 2.0 * np.pi / 3.0 # radians if traj.topology is None: raise ValueError('baker_hubbard requires that traj contain topology ' 'information') def get_donors(e0, e1): elems = set((e0, e1)) bonditer = traj.topology.bonds atoms = [(b[0], b[1]) for b in bonditer if set((b[0].element.symbol, b[1].element.symbol)) == elems] indices = [] for a0, a1 in atoms: if exclude_water and (a0.residue.name == 'HOH' or a1.residue.name == 'HOH'): continue pair = (a0.index, a1.index) # make sure to get the pair in the right order, so that the index # for e0 comes before e1 if a0.element.symbol == e1: pair = pair[::-1] indices.append(pair) return indices nh_donors = get_donors('N', 'H') oh_donors = get_donors('O', 'H') xh_donors = np.array(nh_donors + oh_donors) if len(xh_donors) == 0: # if there are no hydrogens or protein in the trajectory, we get # no possible pairs and return nothing return np.zeros((0, 3), dtype=int) if not exclude_water: acceptors = [a.index for a in traj.topology.atoms if a.element.symbol == 'O' or a.element.symbol == 'N'] else: acceptors = [a.index for a in traj.topology.atoms if (a.element.symbol == 'O' and a.residue.name != 'HOH') or a.element.symbol == 'N'] angle_triplets = np.array([(e[0][0], e[0][1], e[1]) for e in product(xh_donors, acceptors)]) distance_pairs = angle_triplets[:, [1,2]] # possible H..acceptor pairs angles = compute_angles(traj, angle_triplets, periodic=periodic) distances = compute_distances(traj, distance_pairs, periodic=periodic) mask = np.logical_and(distances < distance_cutoff, angles > angle_cutoff) # frequency of occurance of each hydrogen bond in the trajectory occurance = np.sum(mask, axis=0).astype(np.double) / traj.n_frames if get_list: return [angle_triplets[i] for i in mask] else: return angle_triplets[occurance > freq]