def get_hbonds(self): """ Measure all hydrogen bonds in each frame then log the bonds in `self.ligand_donor_hbonds` and `self.receptor_donor_hbonds`. Uses the Wernet Nilsson criteria [1] for detecting bonds as implemented in mdtraj. According to the mdtraj docs that means the criterion employed is: "r_DA < 3.3 Angstom −0.00044∗δHDA∗δHDA, where r_DA is the distance between donor and acceptor heavy atoms, and δHDA the angle made by the hydrogen atom, donor, and acceptor atoms, measured in degrees (zero in the case of a perfectly straight bond: D-H ... A)." [1] Wernet, Ph., et al. “The Structure of the First Coordination Shell in Liquid Water.” (2004) Science 304, 995-999. Returns ------- """ top = self.top all_frames_hbonds = md.wernet_nilsson(self.traj) for hbonds in all_frames_hbonds: # TODO: Possibly need to filter multiple interactions for same atom self.ligand_donor_hbonds.append( hbonds[(np.isin(hbonds[:, 0], top.ligand_idxs)) & (np.isin(hbonds[:, 2], top.receptor_idxs))]) self.receptor_donor_hbonds.append( hbonds[(np.isin(hbonds[:, 0], top.receptor_idxs)) & (np.isin(hbonds[:, 2], top.ligand_idxs))])
def test_wernet_nilsson_1(): # one of these files has PBCs and the other doesnt for fn in ['2EQQ.pdb', '4K6Q.pdb']: t = md.load(get_fn(fn)) result = md.wernet_nilsson(t) assert len(result) == len(t) assert isinstance(result, list) assert all(isinstance(elem, np.ndarray) for elem in result) assert all(elem.shape[1] == 3 for elem in result) for frame, hbonds in enumerate(result): for d_i, h_i, a_i in hbonds: assert t.topology.atom(d_i).element.symbol in ['O', 'N'] assert t.topology.atom(h_i).element.symbol == 'H' assert t.topology.atom(a_i).element.symbol in ['O', 'N'] # assert that the donor-acceptor distance is less than 0.5 nm, just # to make sure the criterion is giving back totally implausible stuff if len(hbonds) > 0: assert np.all(md.compute_distances(t[frame], hbonds[:, [0,2]]) < 0.5)
def hydrogenbond(pdbfile): """ Calculate number of hydrogen bonds [1,2] and hydrogen bond energy [3] using three methods as implemented in MDtraj [4] for a single PDB file. [1] E. N. Baker and R. E. Hubbard, "Hydrogen bonding in globular proteins", Progress in Biophysics and Molecular Biology, vol. 44, no. 2, pp. 97-179, 1984. [2] P. Wernet et al., "The Structure of the First Coordination Shell in Liquid Water", Science, vol. 304, no. 5673, pp. 995-999, 2004. [3] W. Kabsch and C. Sander, "Dictionary of protein secondary structure: Pattern recognition of hydrogen-bonded and geometrical features", Biopolymers, vol. 22, no. 12, pp. 2577"2637, 1983. [4] R. T. McGibbon et al., "MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories", Biophysical Journal, vol. 109, no. 8, pp. 1528-1532, 2015. """ pdb = mdtraj.load(pdbfile) desc = dict() desc['HB_BH'] = float(mdtraj.baker_hubbard(pdb).shape[0]) desc['HB_WN'] = float(mdtraj.wernet_nilsson(pdb)[0].shape[0]) desc['HB_KS'] = float(mdtraj.kabsch_sander(pdb)[0].sum()) return desc
def test_wernet_nilsson_0(): # no hydrogens in this file -> no hydrogen bonds t0 = md.load(get_fn('1bpi.pdb')) assert len(md.wernet_nilsson(t0)) == len(t0) eq(np.zeros((0, 3), dtype=int), md.wernet_nilsson(t0)[0])
def count_hydrogen_bonds(traj_filename, top_filename, output_filename, mol2_filename, ndx_filename, top_group, bottom_group): """Count the number of inter- or intra-monolayer hydrogen bonds The number of inter- and intra-monolayer hydrogen bonds is determined for each frame in a trajectory using the Wernet-Nilsson method implemented in MDTraj. Parameters ---------- traj_filename : str Name of trajectory file top_filename : str Name of topology file output_filename : str Name of output file mol2_filename : str Name of mol2 file used to read bond information ndx_filename : str Name of Gromacs .ndx file which specifies atom groups top_group : str Only atom indices from this group (read from a .ndx file) will be considered as part of the top monolayer. bottom_group : str Only atom indices from this group (read from a .ndx file) will be considered as part of the bottom monolayer. """ top = md.load(top_filename).topology top_atoms = [atom for atom in top.atoms] groups = read_ndx(ndx_filename) top_monolayer = np.array(groups[top_group]) - 1 bottom_monolayer = np.array(groups[bottom_group]) - 1 monolayers = np.hstack((bottom_monolayer, top_monolayer)) h_bonds_top = [] h_bonds_bottom = [] h_bonds_interface = [] time_traj = [] for traj_chunk in md.iterload(traj_filename, top=mol2_filename, chunk=10): for i, atom in enumerate(traj_chunk.top.atoms): atom.element = top_atoms[i].element h_bonds_total = md.wernet_nilsson(traj_chunk) for frame in h_bonds_total: h_bonds_top_frame = [ tuple(bond) for bond in frame if all(atom_id in top_monolayer for atom_id in bond) ] h_bonds_bottom_frame = [ tuple(bond) for bond in frame if all(atom_id in bottom_monolayer for atom_id in bond) ] h_bonds_interface_frame = [ tuple(bond) for bond in frame if (all(atom_id in monolayers for atom_id in bond) and tuple(bond) not in h_bonds_top_frame and tuple(bond) not in h_bonds_bottom_frame) ] h_bonds_top.append(len(h_bonds_top_frame)) h_bonds_bottom.append(len(h_bonds_bottom_frame)) h_bonds_interface.append(len(h_bonds_interface_frame)) time_traj.append(traj_chunk.time) time_traj = np.concatenate(time_traj).ravel() h_bonds_top = np.asarray(h_bonds_top) h_bonds_bottom = np.asarray(h_bonds_bottom) h_bonds_interface = np.asarray(h_bonds_interface) np.savetxt(output_filename, np.column_stack((time_traj, h_bonds_interface, h_bonds_top, h_bonds_bottom)), header='Time\tInter-\tIntra-top\tIntra-bottom')
def checkType(n1, n2): if int_type == "ALL": return True if int_type == "BB-BB": return n1 in BB_names and n2 in BB_names if int_type == "BB-SC": return n1 in BB_names != n2 in BB_names if int_type == "SC-SC": return not n1 in BB_names and not n2 in BB_names return True print("Reading md-trajectory ..") t = md.load(trj_file, top=top_file) frame_count = len(t) print("Analyzing hbond network in %d frames .." % frame_count) hbonds_allframes = md.wernet_nilsson(t) hbond_frames = defaultdict(set) for f, frame in enumerate(t[:20]): #hbonds = md.baker_hubbard(frame, periodic=True) hbonds = hbonds_allframes[f] print("Frame %d .. %d hbonds" % (f, hbonds.shape[0])) for hbond in hbonds: a1 = t.topology.atom(hbond[0]) a2 = t.topology.atom(hbond[2]) if not checkType(a1.name, a2.name): continue resi1 = a1.residue.index resi2 = a2.residue.index if resi1 != resi2: key = (min(resi1, resi2), max(resi1, resi2)) hbond_frames[key].add(f)
def count_hydrogen_bonds(traj_filename, top_filename, ndx_filename, mol2_filename, top_group, bottom_group, output_filename): """Count the number of inter- or intra-monolayer hydrogen bonds The number of inter- and intra-monolayer hydrogen bonds are determined for each frame in a trajectory using the Wernet-Nilsson method implemented in MDTraj and are output to a file with a user-specified filename. Parameters ---------- traj_filename : str Name of trajectory file (typically XTC format) top_filename : str Name of topology file (typically GRO format) ndx_filename : str Name of the GROMACS index file to read group information from mol2_filename : str Name of mol2 file used to read bond information top_group : str Tag for index group (read from NDX file) for indices considered as part of the top monolayer bottom_group : str Tag for index group (read from NDX file) for indices considered as part of the bottom monolayer output_filename : str Name of file to output results to """ topo = md.load(top_filename).topology atoms = list(topo.atoms) groups = read_ndx(ndx_filename) bottom_monolayer = np.array(groups[bottom_group]) top_monolayer = np.array(groups[top_group]) monolayers = np.hstack((bottom_monolayer, top_monolayer)) h_bonds_top = [] h_bonds_bottom = [] h_bonds_interface = [] time_traj = [] for traj_chunk in md.iterload(traj_filename, top=mol2_filename, chunk=10): for i, atom in enumerate(traj_chunk.top.atoms): atom.element = atoms[i].element h_bonds_total = md.wernet_nilsson(traj_chunk) for frame in h_bonds_total: h_bonds_top_frame = [] h_bonds_bottom_frame = [] h_bonds_interface_frame = [] for bond in frame: if all(atom_id in top_monolayer for atom_id in bond): h_bonds_top_frame.append(tuple(bond)) elif all(atom_id in bottom_monolayer for atom_id in bond): h_bonds_bottom_frame.append(tuple(bond)) elif all(atom_id in monolayers for atom_id in bond): h_bonds_interface_frame.append(tuple(bond)) h_bonds_top.append(len(h_bonds_top_frame)) h_bonds_bottom.append(len(h_bonds_bottom_frame)) h_bonds_interface.append(len(h_bonds_interface_frame)) time_traj.append(traj_chunk.time) time_traj = np.concatenate(time_traj).ravel() h_bonds_top = np.asarray(h_bonds_top) h_bonds_bottom = np.asarray(h_bonds_bottom) h_bonds_interface = np.asarray(h_bonds_interface) np.savetxt(output_filename, np.column_stack((time_traj, h_bonds_interface, h_bonds_top, h_bonds_bottom)), header='Time\tInter-\tIntra-top\tIntra-bottom')
def create_json(self,isGPCR,trj_file,top_file,resi_to_group,resi_to_name,newpath,stride,seg_to_chain): out_file = re.search("(\w*)(\.\w*)$" , newpath).group() self.stdout.write(self.style.NOTICE("Reading MD trajectory...")) num_frames=get_num_frames(trj_file,stride) it=md.iterload(filename=trj_file,chunk=(50/stride), top=top_file , stride=stride) f=0 self.stdout.write(self.style.NOTICE("Analyzing Hbond network. It may take a while...")) hbond_frames = defaultdict(set) for t in it: hbonds_allframes = md.wernet_nilsson(t) for fnum,frame in enumerate(t[:]): hbonds= hbonds_allframes[fnum] for hbond in hbonds: resi_1 = t.topology.atom(hbond[0]).residue resi_2 = t.topology.atom(hbond[2]).residue if ((resi_1 != resi_2) and (resi_1.is_protein) and (resi_2.is_protein)): if (resi_1.index < resi_2.index): key = ((str(resi_1.resSeq),seg_to_chain[resi_1.segment_id]),(str(resi_2.resSeq),seg_to_chain[resi_2.segment_id])) else: key = ((str(resi_2.resSeq),seg_to_chain[resi_2.segment_id]),(str(resi_1.resSeq),seg_to_chain[resi_1.segment_id])) hbond_frames[key].add(f) f+=1 self.stdout.write(self.style.NOTICE("%d%% completed"%((f/(num_frames/stride))*100))) self.stdout.write(self.style.NOTICE("Analyzing network centrality ..")) #Build networkx graph centrality = defaultdict(int) for resi1,resi2 in hbond_frames: if not resi1 in resi_to_name: continue if not resi2 in resi_to_name: continue resn1 = resi_to_name[resi1] if resi1 in resi_to_name else resi1[0] resn2 = resi_to_name[resi2] if resi2 in resi_to_name else resi2[0] if resn1=="None" or resn2=="None": continue else: resn1=resn1.rsplit(".")[1] resn2=resn2.rsplit(".")[1] interaction_count = len(hbond_frames[(resi1,resi2)]) weight = interaction_count/num_frames centrality[resn1] += weight centrality[resn2] += weight #Normalize centrality to the range [0:1] min_centrality = min([centrality[v] for v in centrality]) max_centrality = max([centrality[v] for v in centrality]) for v in centrality: centrality[v] = (centrality[v]-min_centrality)/(max_centrality-min_centrality) self.stdout.write(self.style.NOTICE("Writing hbonds to %s .."%out_file)) #Collect entries for edges and trees (grouping of nodes) edge_entries = [] tree_paths = set() for resi1,resi2 in hbond_frames: if not resi1 in resi_to_name: continue if not resi2 in resi_to_name: continue resn1 = resi_to_name[resi1] if resi1 in resi_to_name else resi1[0] resn2 = resi_to_name[resi2] if resi2 in resi_to_name else resi2[0] if resn1=="None" or resn2=="None": continue else: resn1=resn1.rsplit(".")[1] resn2=resn2.rsplit(".")[1] framelist = sorted(list(hbond_frames[(resi1,resi2)])) helixinfo=get_cont_type(self,trj_file,resn1,resn2) if (helixinfo): edge_entries.append(" {\"name1\":\"%s\", \"name2\":\"%s\", \"frames\":%s, \"helixpos\":\"%s\"}"%(resn1,resn2,str(framelist),helixinfo)) else: edge_entries.append(" {\"name1\":\"%s\", \"name2\":\"%s\", \"frames\":%s}"%(resn1,resn2,str(framelist))) tree_paths.add(resi_to_group[resi1]+"."+resn1) tree_paths.add(resi_to_group[resi2]+"."+resn2) #Collect entries for the helix track (coloring of nodes) helix_track_entries = [] helix_colors = {1:"#78C5D5",12:"#5FB0BF",2:"#459BA8",23:"#5FAF88",3:"#79C268",34:"#9FCD58",4:"#C5D747",45:"#DDD742",5:"#F5D63D",56:"#F3B138",6:"#F18C32",67:"#ED7A6A",7:"#E868A1",78:"#D466A4",8:"#BF63A6"} if isGPCR: for tp in tree_paths: try: #res_name = tp[tp.rfind("x")+1:] res_name = tp.split(".",1)[1] res_helix = int(tp[tp.rfind(".")+1:tp.find("x")]) helix_track_entries.append(" { \"nodeName\": \"%s\", \"color\": \"%s\", \"size\":\"1.0\" }"%(res_name,helix_colors[res_helix])) except ValueError: pass except IndexError: pass except KeyError: pass #Collect entries for the centrality track centrality_track_entries = [] def ccol(val): col1 = (255,127,80) col2 = (255,255,255) rgb = tuple([int(c1*val+c2*(1-val)) for c1,c2 in zip(col1,col2)]) return '#%02x%02x%02x' % rgb for tp in tree_paths: try: res_name = tp[tp.rfind(".")+1:] res_helix = int(tp[tp.rfind(".")+1:tp.find("x")]) cent_val = centrality[res_name] centrality_track_entries.append(" { \"nodeName\": \"%s\", \"color\": \"%s\", \"size\":\"%s\" }"%(res_name,ccol(cent_val), cent_val)) except ValueError: pass except IndexError: pass #Write everything with open(newpath,"w") as of: of.write("{\n") of.write(" \"edges\": [\n") of.write(",\n".join(edge_entries)) of.write("\n") of.write(" ],\n") of.write(" \"trees\": [\n") of.write(" {\n") of.write(" \"treeLabel\":\"Helices\",\n") of.write(" \"treePaths\": [\n") of.write(",\n".join([" \""+tp+"\"" for tp in tree_paths])) of.write("\n") of.write(" ]\n") of.write(" }\n") of.write(" ],\n") of.write(" \"tracks\": [\n") of.write(" {\n") of.write(" \"trackLabel\": \"Helices\",\n") of.write(" \"trackProperties\": [\n") of.write(",\n".join(helix_track_entries)) of.write("\n") of.write(" ]},\n") of.write(" {\n") of.write(" \"trackLabel\": \"Degree centrality\",\n") of.write(" \"trackProperties\": [\n") of.write(",\n".join(centrality_track_entries)) of.write("\n") of.write(" ]}\n") of.write(" ],\n") of.write(" \"defaults\":{\"edgeColor\":\"rgba(50,50,50,100)\", \"edgeWidth\":2 }\n") of.write("}\n") ############################################# else: for tp in tree_paths: try: #res_name = tp[tp.rfind("x")+1:] res_name = tp[tp.rfind(".")+1:] res_helix = int(tp[tp.rfind(".")+1:tp.find("x")]) track_entries.append(" { \"nodeName\": \"%s\", \"color\": \"%s\", \"size\":\"1.0\" }"%(res_name,helix_colors[res_helix])) except ValueError: pass except IndexError: pass except KeyError: pass #Write everything with open(newpath,"w") as of: of.write("{\n") of.write(" \"edges\": [\n") of.write(",\n".join(edge_entries)) of.write("\n") of.write(" ],\n") of.write(" \"trees\": [\n") of.write(" {\n") of.write(" \"treeLabel\":\"Helices\",\n") of.write(" \"treePaths\": [\n") of.write(",\n".join([" \""+tp+"\"" for tp in tree_paths])) of.write("\n") of.write(" ]\n") of.write(" }\n") of.write(" ],\n") of.write(" \"tracks\": [\n") of.write(" {\n") of.write(" \"trackLabel\": \"Helices\",\n") of.write(" \"trackProperties\": [\n") of.write(",\n".join(track_entries)) of.write("\n") of.write(" ]}\n") of.write(" ],\n") of.write(" \"defaults\":{\"edgeColor\":\"rgba(50,50,50,100)\", \"edgeWidth\":2 }\n") of.write("}\n")
formatter_class=argparse.RawDescriptionHelpFormatter) #parser.add_argument('-', "--", help="", default="") parser.add_argument("-v", "--verbose", action="store_true", help="be verbose") parser.add_argument("file", help="", default="", nargs='+') return parser if __name__ == '__main__': parser = get_parser() args = parser.parse_args() if list != type(args.file): args.file = [args.file] for f in args.file: import mdtraj as md print(f, '---------------------') t = md.load(f) print(md.kabsch_sander(t)) hb = md.wernet_nilsson(t) print(hb) print(len(hb[0])) hb = md.baker_hubbard(t) print(hb) print(len(hb))
default='nopbc.pdb', dest='pdb') parser.add_option('-o', action="store", type="string", default='hbonds.txt', dest='outfile') (options, args) = parser.parse_args() traj_pdb = mdtraj.load(options.trajectory, top=options.pdb) topol = traj_pdb.topology outfile = open(options.outfile, 'w') start = time.time() hbonds = mdtraj.wernet_nilsson(traj_pdb[0:-1:5], exclude_water=True, include_water_solute=True) end = time.time() duration = end - start outfile.write('{:<20s}: {}\n'.format('Trajectory', options.trajectory)) outfile.write('{:<20s}: {}\n'.format('Structure', options.pdb)) total_hbonds = [] # hbonds is just a list of lists, the outer list corresponds to each fframe # the inner list is a list of each hbonding triplet for hbond_frame in hbonds: total_hbonds.append(np.shape(hbond_frame)[0]) with_avg = np.mean(total_hbonds) with_std = np.std(total_hbonds) outfile.write("{:<20}{}({})\t{:>10s}{:>10}\n".format("yes water-solute:",