def is_pistack(sring, tring, pistack_dist_max=0.55, pistack_ang_dev=30.0, min_dist=0.05, pistack_offset_max=0.20): pistack = False pistack_data = {'type': None} # Calculate source ring geometric center and normal to it sring_center = sring.center() sring_normal = plane_fit(sring.coord, center=sring_center) # Calculate target ring geometric center and normal to it tring_center = tring.center() tring_normal = plane_fit(tring.coord, center=tring_center) * -1 # Calculate distance between ring centers dist = distance(sring_center, tring_center) # Rule 1: distance between ring centers if min_dist < dist < pistack_dist_max: # Calculate ring offset, (project each ring center into the other ring) proj1 = projection(sring_normal, sring_center, tring_center) proj2 = projection(tring_normal, tring_center, sring_center) offset = min(distance(proj1, sring_center), distance(proj2, tring_center)) # Calculate angles between normals # Select smallest of the two depending on direction a = vector_angle(sring_normal, tring_normal, deg=True) a = min(a, 180 - a if not 180 - a < 0 else a) pistack_data.update({'distance': dist, 'angle': a, 'offset': offset, 'sring_center': sring_center, 'tring_center': tring_center, 'sring_normal': sring_normal, 'tring_normal': tring_normal, 'sring': tuple(sring['serial']), 'tring': tuple(tring['serial'])}) # Rule 2: pi-stacking if 0 < a < pistack_ang_dev and offset < pistack_offset_max: pistack_data['type'] = 'ps' pistack = True # Rule 3: T-stacking elif 90 - pistack_ang_dev < a < 90 + pistack_ang_dev and offset < pistack_offset_max: pistack_data['type'] = 'ts' pistack = True if pistack: logger.info('Pi-stacking type: {0} between {1}-{2} {3} and {4}-{5} {6}'.format(pistack_data['type'], set(sring['resSeq']), set(sring['resName']), tuple(sring['serial']), set(tring['resSeq']), set(tring['resName']), tuple(tring['serial']))) logger.info('Distance: {distance:.3f} nm, angle: {angle:.2f} deg, offset: {offset:.3f} nm'.format( **pistack_data)) return pistack, pistack_data
def is_pication(cation, ring, min_dist=0.05, pication_dist_max=0.6, pication_offset_max=0.2, pication_amine_angle_dev=30.0): """ Compute if a pi-cation interaction could exist between a cation and an aromatic ring Rules for pi-Cation interactions: 1) Distance between cation and aromatic ring center should be between `min_dist` and `pication_dist_max` 2) The distance offset between the ring center and the cation after projection onto the ring plane should not be more then `pication_offset_max`. 3) If the cation is an amine that is linked with the remainder of the residue through more than one covalent bond (less degrees of freedom), the ring center should be positioned above the amine valence electrons computed as the angle between the normal of the ring and the normal defined by the heavy atom neighbours of the amine nitrogen that should be no more then `pication_amine_angle_dev` The function returns a boolean indicating if there is a pi-cation interaction and a dictionary with the computed metrics. notably: ring_center, ring_normal, distance, offset and cat_angle and cat_normal if it concerns an amine. :param cation: Cation atom selection :type cation: :interact:TopologyDataFrame, :interact:TopologySeries :param ring: Atom selection of the aromatic ring :type ring: :interact:TopologyDataFrame :param min_dist: minimum interaction distance (nm) :type min_dist: :py:float :param pication_dist_max: maximum distance for pi-cation interactions (nm) :type pication_dist_max: :py:float :param pication_offset_max: Cutoff distance between geometric centers :type pication_offset_max: :py:float :param pication_amine_angle_dev: Maximum angle deviation between amine and ring normals. :type pication_amine_angle_dev: :py:float :return: pi-cation interaction or not + data :rtype: :py:bool, py:dict """ ispicat = False data = {} # Calculate distance between cation and ring center. # Calculate offset between ring center and cation projected onto ring plane ring_center = ring.center() ring_normal = plane_fit(ring.coord, center=ring_center) pcdist = distance(cation.coord, ring_center) pcoffset = distance(projection(ring_normal, ring_center, cation.coord), ring_center) data.update({'ring_center': ring_center, 'ring_normal': ring_normal, 'distance': pcdist, 'offset': pcoffset}) if min_dist < pcdist < pication_dist_max and pcoffset < pication_offset_max: ispicat = True # If it concerns an tertiary or quarternary amine. Check angles. # Otherwise, we might have have a pi-cation interaction 'through' the ligand if cation.attype in ('N.3', 'N.4'): neigh = cation.neighbours(covalent=True) nonh = neigh[neigh['attype'] != 'H'] # Count number of heavy atom neighbours that themselves are linked links = 0 for i, n in nonh.iterrows(): links += (len(n.neighbours(covalent=True)) -1 ) if len(nonh) > 2 and links >= 2: # Calculate normal to plane defined by covalent neighbours of cation # Calculate angle between ring and cation normal cation_normal = plane_fit(nonh.coord, center=cation.coord) cation_angle = vector_angle(ring_normal, cation_normal) cation_angle = min(cation_angle, 180 - cation_angle if not 180 - cation_angle < 0 else cation_angle) data['cat_angle'] = cation_angle data['cat_normal'] = cation_normal # Vector angle should not deviate more then pication_amine_angle_dev if not cation_angle < pication_amine_angle_dev: ispicat = False logging.debug('Cation likely an amine. Angle to ring normal: {0:.2f} deg.'.format(cation_angle)) if ispicat: logger.info('Cation-pi interaction between {0}-{1} and ring {2}-{3}. Distance: {4:.3f} nm' ' Offset:{5:.2f} nm'.format(cation.resName, cation.name, ring['resName'].values[0], ring['resSeq'].values[0], pcdist, pcoffset)) return ispicat, data
def eval_saltbridge(contact_frame, topology, max_charge_dist=0.55): """ Evaluate contacts between centers of positive and negative charge. Physiological relevant pH is assumed. Algorithm: 1) Primary selection is between all source and target atoms that are max_charge_dist apart according to (Barlow and Thornton, 1983) + 0.15 nm 2) Select all residues in previous selection that have a formal positive or negative charge according to the sum of partial charges in the 'charge' column. The latter charges are Gasteiger partial charges by default. 3) Select all atoms that are likely a part of the charged group in in the residues from step 2 based on SYBYL atom types following: amino-acid type atom charge -------------------------------------------------------------------- Arginine - Arg - R N.pl3 RNHC(NH2)2+ + Lysine - Lys - K N.4 RNH3 + Histidine - His - H N.ar ND1, NE2 + Aspartic acid - Asp - D O.co2 RCOO- - Glutamic acid - Glu - E O.co2 RCOO- - Ligands type atom charge -------------------------------------------------------------------- quaterny ammonium N.4 + tertiary amines N.am + sulfonium groups S.3 + guanidine groups C.cat + phosphate O.co2 PO4 - sulfonate S.3 RSO2O- - sulfonic acid S.O2 - carboxylate O.co2 - 4) Select neighbouring atoms not of element type 'C' or 'H' in the selection from step 3 to define all atoms part of the charged group 5) Evaluate salt-bridges by the distance between the geometrical centers of two charged groups of opposite sign being smaller or equal to max_charge_dist. Although multiple atoms of both charged groups take part in the salt-bridge only the pair with the shortest atom-atom distance is reported using the labels: - 'sb-pn': for a positive charged source to negative target contact. - 'sb-np': for a negative charged source to positive target contact. Because salt-bridges are composed of hydrogen-bonded and charged interactions, the reported atom pair often was reported before as taking part in a hydrogen-bonded interactions when the `eval_hbonds` function was used. The salt-bridge label will be added to the contact column maintaining the hydrogen bond label. :param contact_frame: contact DataFrame :type contact_frame: :py:DataFrame :param topology: Pandas DataFrame representing the structure :type topology: :interact:ToplogyDataFrame :param max_charge_dist: maximum distance cutoff between charge centers :type max_charge_dist: :py:float :return: Adds the labels 'sb-np' or 'sb-pn' to the 'contact' column of the input contact frame. :rtype: :pandas:DataFrame """ # Preselect all contacts below max_charge_dist chdist = contact_frame[contact_frame['target', 'distance'] <= max_charge_dist] # Select all charged source and target residues charged_groups = {'source': [], 'target': []} for group in charged_groups.keys(): for charge_group in topology[topology['serial'].isin(chdist[group, 'serial'])].find_charged_centers(): if charge_group[1] <= -1: charged_groups[group].append(('n', charge_group[0])) else: charged_groups[group].append(('p', charge_group[0])) if not len(charged_groups['source']) or not len(charged_groups['target']): logger.info('Not running salt-bridge detection. Charged groups in source: {0}, target: {1}'.format( len(charged_groups['source']), len(charged_groups['target']))) return contact_frame logger.info( "Run salt-bridge detection on {0} source and {1} target charged groups using: max_charge_dist={2}".format( len(charged_groups['source']), len(charged_groups['target']), max_charge_dist)) # Loop over combinations of source and target charged groups for s, l in itertools.product(charged_groups['source'], charged_groups['target']): if s[0] != l[0]: center_distance = distance(s[1].center(), l[1].center()) sb_type = 'sb-{0}{1}'.format(s[0], l[0]) source_center = repr(list(s[1]['serial'])).strip('[]') target_center = repr(list(l[1]['serial'])).strip('[]') if center_distance <= max_charge_dist: logger.info('{0} between {1}-{2} and {3}-{4}. D: {5:.3f} nm between groups {6} and {7}'.format( sb_type, s[1]['resSeq'].unique()[0], s[1]['resName'].unique()[0], l[1]['resSeq'].unique()[0], l[1]['resName'].unique()[0], center_distance, source_center, target_center)) # Report salt-bridges sb_selection = contact_frame[(contact_frame['source', 'serial'].isin(s[1]['serial'])) & contact_frame['target', 'serial'].isin(l[1]['serial'])] report_to = sb_selection.sort_values(by=('target', 'distance')).head(n=1) contact_frame.loc[report_to.index, 'contact'] = set_contact_type( contact_frame.loc[report_to.index, 'contact'], sb_type) return contact_frame
def eval_heme_coordination(contact_frame, topology, rings=None, heme_dist_prefilter=0.55, heme_dist_max=0.35, heme_dist_min=0, min_heme_coor_angle=105, max_heme_coor_angle=160, fe_ox_dist=0.16, exclude=('H', 'O.3', 'O.2', 'O.co2', 'O.spc', 'O.t3p', 'C.cat', 'S.o2')): """ Evaluate heme coordination of ligand atoms """ rings = rings or [] # Select all atoms within heme_dist_prefilter distance from Fe excluding atoms in exclude list fedist = contact_frame[(contact_frame['target', 'name'] == 'FE') & (~contact_frame['source', 'attype'].isin(exclude)) & (contact_frame['target', 'distance'] < heme_dist_prefilter)] if fedist.empty: return contact_frame # Get Fe atom fe = topology[(topology['resName'] == 'HEM') & (topology['name'] == 'FE')] if fe.empty: logger.warn("Unable to asses heme coordination. Fe atom not found") return contact_frame # Get four nitrogen atoms coordinating the Fe atom fe_neigh = fe.neighbours(cutoff=0.3) fe_coordinating = fe_neigh[(fe_neigh['resName'] == 'HEM') & (fe_neigh['element'] == 'N')].sort_values(by='name') if len(fe_coordinating) != 4: logger.warn("Unable to asses heme coordination. Found {0} nitrogen atoms coordinating Fe. Expected 4".format( len(fe_coordinating))) return contact_frame logger.debug("Run heme coordination detection on {0} possible contacts using: heme_dist_prefilter={1:.2f}, " "heme_dist_min={2:.2f}, heme_dist_max={3:.2f}, min_heme_coor_angle={4:.2f}, max_heme_coor_angle={5:.2f}, " "fe_ox_dist={6:.2f}".format(fedist.shape[0], heme_dist_prefilter, heme_dist_min, heme_dist_max, min_heme_coor_angle, max_heme_coor_angle, fe_ox_dist)) # Calculate normals between Nitrogens -> Fe vectors fe_coor = fe.coord n_coor = fe_coordinating.coord - fe_coor m1 = numpy.cross(n_coor[0], n_coor[1]) m2 = numpy.cross(n_coor[1], n_coor[2]) m3 = numpy.cross(n_coor[2], n_coor[3]) m4 = numpy.cross(n_coor[3], n_coor[0]) # Is there an Oxygen above the heme (complex I) or do we need to place a dummy close_fe_neigh = fe.neighbours(cutoff=0.2) dummyox = close_fe_neigh[(close_fe_neigh['resName'] == 'HEM') & (close_fe_neigh['element'] == 'O')] mv = numpy.mean(numpy.vstack((m1, m2, m3, m4)), axis=0) if len(dummyox) == 1: dummyox = dummyox.coord logger.info('Oxygen atom bonded to Fe (complex I)') else: # Calculate dummy O atom from the average of the four normals # Normalize normal mean, change vector size to 1.6 A and set point dummyox = ((mv / numpy.linalg.norm(mv)) * fe_ox_dist) + fe_coor logger.info("Reconstructed oxygen atom placed {0}nm above Heme Fe at position {1}".format(fe_ox_dist, ' '.join( ['{0:.3f}'.format(c) for c in dummyox]))) # Check the coordination of the Fe atom by the SG atom of the Cys below Heme sg = fe_neigh[(fe_neigh['resName'] == 'CYS') & (fe_neigh['name'] == 'SG')] if not sg.empty: sg_angle = angle(dummyox, fe_coor, sg.coord) if not 160 < sg_angle < 200: logger.warn("Angle between reconstructed oxygen -> Fe -> Cys SG has unusual value {0:.3f}".format(sg_angle)) else: logger.warn("No CYS SG atom in a distance of 0.3nm of the Heme Fe atom") # Check if there are rings with there center of mass below heme_dist_prefilter from heme FE. # Calculate ring normals ring_normals = [] for aromatic in rings: aromatic_center = aromatic.center() aromatic_fe_dist = distance(fe_coor, aromatic_center) if aromatic_fe_dist < heme_dist_prefilter: aromatic_norm = plane_fit(aromatic.coord, center=aromatic_center) aromatic_norm_angle = vector_angle(aromatic_norm, mv, deg=True) aromatic_norm_angle = min(aromatic_norm_angle, 180 - aromatic_norm_angle if not 180 - aromatic_norm_angle < 0 else aromatic_norm_angle) ring = aromatic.index.tolist() ring_normals.append((aromatic_center, aromatic_norm, aromatic_norm_angle, ring)) logger.info("Ring {0} close to heme Fe: distance center-Fe {1:.2f}nm, normal angle heme plane-ring:" "{2:.2f} deg.".format(ring, aromatic_fe_dist, aromatic_norm_angle)) # Get ligand atoms coordinated for idx, n in fedist.iterrows(): source = topology[topology.index == n['source', 'index']] source_atom_type = n['source', 'attype'] z = source.coord # Check for heme coordination by aromatic nitrogens. label as 'hc' if source_atom_type in ('N.ar', 'N.2', 'N.3'): ar_norm_angle = 90 for ring in ring_normals: if n['source', 'index'] in ring[-1]: ar_norm_angle = ring[2] break fe_dist = distance(z, fe_coor) fe_offset = distance(projection(mv, fe_coor, z), fe_coor) if 45 < ar_norm_angle < 95 and fe_dist < 0.35 and fe_offset < 0.1: contact_frame.loc[idx, 'contact'] = set_contact_type(contact_frame.loc[idx, 'contact'], 'hc') contact_frame.loc[idx, ('target', 'angle')] = ar_norm_angle logger.info( "Heme Fe coordination with {0} {1}. Distance: {2:.2f} A. offset: {3:.2f} A plane normal angle: {4:.2f}".format( n['source', 'serial'], n['source', 'name'], fe_dist, fe_offset, ar_norm_angle)) # Check for possible sites of metabolism and label as 'hm'. # Filter on covalent neighbours and apply knowledge based rules. if source_atom_type in ('C.2', 'C.3', 'C.ar', 'N.1', 'N.2', 'N.4', 'N.pl3', 'S.3'): cutoff = 0.16 if source_atom_type == 'S.3': cutoff = 0.18 neigh = source.neighbours(cutoff=cutoff) neigh_atom_types = set(neigh['attype']) # If ligand atom is of type C.3 or C.ar it should contain at least one covalently bonded atom # of type ['H','Cl','I','Br','F','Hal'] if source_atom_type in ('C.3', 'C.ar') and len( neigh_atom_types.intersection({'H', 'Cl', 'I', 'Br', 'F', 'Hal'})) == 0: logger.debug( "Ligand target atom {0}-{1} excluded. Atom type {2} not covalently bonded to: H,Cl,I,Br,F or Hal".format( n['source', 'serial'], n['source', 'name'], source_atom_type)) continue # If ligand atom is of type N.4 it should contain at least one covalently bonded atom of type H if source_atom_type == 'N.4' and not 'H' in neigh_atom_types: logger.debug( "Ligand target atom {0}-{1} excluded. Atom type N.4 not covalently bonded to hydrogen".format( n['source', 'serial'], n['source', 'name'])) continue # Exclude carbons that are a part of ketone or carboxylate if source_atom_type == 'C.2' and 'O.2' in neigh_atom_types or 'O.co2' in neigh_atom_types: logger.debug( "Ligand target atom {0}-{1} excluded. Atom type C.2 part of ketone or carboxylate group.".format( n['source', 'serial'], n['source', 'name'])) continue # Additional check on S.O2 wrongly labeled as S.3 (PLANTS?) if source_atom_type == 'S.3' and neigh[neigh['attype'] == 'O.2'].shape[0] == 2: logger.debug( "Ligand target atom {0}-{1} excluded. Atom labeled as S.3 but probably S.O2 as it is covalently bonded to two O.2".format( n['source', 'serial'], n['source', 'name'])) continue # If N.pl3 or N.2 check for nitro- or nitrate group. if source_atom_type in ('N.pl3', 'N.2') and ( 'O.co2' in neigh_atom_types or len(neigh_atom_types.intersection(set(['O.2', 'O.3']))) == 2): logger.debug("Ligand target atom {0}-{1} excluded. Atom type {2}, exclude nitro- nitrate group".format( n['source', 'serial'], n['source', 'name'], source_atom_type)) continue # Exclude (iso)-nitrile group if source_atom_type in 'N.1' and 'C.1' in neigh_atom_types: logger.debug( "Ligand target atom {0}-{1} excluded. Carbon with Sp hybridized N".format(n['source', 'serial'], n['source', 'name'])) continue # Check Heme-Nitrogen coordination (Type II binding). if source_atom_type in ('C.ar', 'N.ar'): ar_norm_angle = None for ring in ring_normals: if n['source', 'serial'] in ring[-1]: ar_norm_angle = ring[2] break if ar_norm_angle and not (45 < ar_norm_angle < 85 or 95 < ar_norm_angle < 135): logger.debug( "Ligand target atom {0}-{1} excluded. Aromatic C or N part of ring with angle of {2:.2f} with respect to Heme plane".format( n['source', 'serial'], n['source', 'name'], ar_norm_angle)) continue fe_ox_angle = angle(fe_coor, dummyox, z[0]) dist = distance(dummyox, z) if min_heme_coor_angle < abs(fe_ox_angle) < max_heme_coor_angle and heme_dist_min < dist < heme_dist_max: contact_frame.loc[idx, 'contact'] = set_contact_type(contact_frame.loc[idx, 'contact'], 'hm') contact_frame.loc[idx, ('target', 'angle')] = fe_ox_angle logger.info( "Heme Fe possible som with {0} {1}. Distance: {2:.3f} A. FE-O-X angle: {3:.3f}".format( n['source', 'serial'], n['source', 'name'], dist, fe_ox_angle)) else: logger.debug( "Ligand target atom {0}-{1} excluded. Angle ({2:.3f}) or distance ({3:.3f}) criteria violated".format( n['source', 'serial'], n['source', 'name'], fe_ox_angle, dist)) return contact_frame
def eval_water_bridges(contacts, topology, min_wbridge_dist=0.25, max_wbridge_dist=0.40, min_omega_angle=75.0, max_omega_angle=140.0, min_theta_angle=100.0, wbfilter=True): """ Evaluate the presence of water mediated hydrogen bonded bridges in the provided contact DataFrame. Algorithm: 1) Select all water oxygen atoms within the range defined by min_wbridge_dist (Jiang et al., 2005) - 0.01 nm and max_wbridge_dist (Jiang et al., 2005) + 0.04 nm 2) For each water, get neighbouring atoms below max_wbridge_dist excluding other waters. 3) In two loops look for ligand donor - water - other acceptor pairs and ligand acceptor - water - other donor pairs. 4) For each pair check if there is at least one covalently bound hydrogen attached to the donor. 5) Check the theta angle (water O - donor H - donor), should be larger than min_theta_angle (Jiang et al., 2005). 6) Check the omega angle (acceptor - water O - donor H), should be in the range defined by min_omega_angle, max_omega_angle (Jiang et al., 2005). 7) If wbfilter option is True: a water molecule is only allowed to participate as donor in two hydrogen bonds (two hydrogen atoms as donors). In the case of more than two possible hydrogen bonds for a water molecule as donor, only the two contacts with a water angle closest to 110 deg. and/or smaller H-bond distances are kept. :param contacts: contact DataFrame :type contacts: :py:DataFrame :param topology: Pandas DataFrame representing the structure :type topology: :interact:ToplogyDataFrame :param min_wbridge_dist: Minimal distance for water bridged hydrogen bonds :type min_wbridge_dist: :py:float :param min_wbridge_dist: Maximum distance for water bridged hydrogen bonds :type min_wbridge_dist: :py:float :param min_omega_angle: Minimum omega angle, acceptor-waterO-donorH :type min_omega_angle: :py:float :param max_omega_angle: Maximum omega angle, acceptor-waterO-donorH :type max_omega_angle: :py:float :param min_theta_angle: Minimum theta angle, waterO-donorH-donor :return: Changes the 'contact' label in the contacts to hb-ad (hydrogen bond acceptor-donor) or hb-da (hydrogen bond donor-acceptor) for identified hydrogen-bonded contacts. Also add the value of the donor-H-acceptor angle. :rtype: :pandas:DataFrame """ # Preselect all water oxygen's close to ligand wbdist = contacts[(contacts['target', 'distance'] > min_wbridge_dist) & (contacts['target', 'distance'] <= max_wbridge_dist) & (contacts['target', 'resName'] == 'HOH') & (contacts['target', 'attype'] == 'O.3')] if wbdist.empty: logger.debug('No water oxygen atoms detected close to the ligand') return contacts logger.info( "Run water bridge detection on {0} possible contacts using: min_wbridge_dist={1}," "max_wbridge_dist={2}, min_omega_angle={3}, max_omega_angle={4}, min_theta_angle={5}," "wbfilter={6}".format(len(wbdist), min_wbridge_dist, max_wbridge_dist, min_omega_angle, max_omega_angle, min_theta_angle, wbfilter)) # Query for potential hbond donor-acceptor pairs accpt_attypes = ('N.3', 'N.2', 'N.1', 'N.acid', 'N.ar', 'O.3', 'O.co2', 'O.2', 'S.m', 'S.a', 'F', 'Br', 'Cl') donor_attypes = ('N.3', 'N.2', 'N.acid', 'N.am', 'N.4', 'N.pl3', 'N.plc', 'O.3') # Loop over waters looking for water bridges ligresnum = wbdist['source', 'resSeq'].unique() for water in sorted(wbdist['target', 'serial'].unique()): # Get neighbouring atoms water = topology.loc[(topology['serial'] == water)] water_neigh = water.neighbours(cutoff=max_wbridge_dist) w = water.coord # Remove other waters if not water_neigh.empty: water_neigh = water_neigh[(water_neigh['resName'] != 'HOH')] # Query possible ligand donor - water - acceptor contacts dwa_pairs = [] for idd, d in water_neigh[(water_neigh['resSeq'].isin(ligresnum)) & ( water_neigh['attype'].isin(donor_attypes))].iterrows(): donor = topology[topology['serial'] == d['serial']] covalent_neighbours = donor.neighbours(covalent=True) x = donor.coord # Check if there are H-atoms attached and asses H-bond geometry criteria for idy, h in covalent_neighbours[covalent_neighbours['attype'] == 'H'].iterrows(): y = topology[topology['serial'] == h['serial']].coord # Check theta angle: donor - hydrogen - water oxygen theta = angle(x, y, w) if abs(theta) > min_theta_angle: # Loop over possible acceptors for ida, a in water_neigh[ ~(water_neigh['resSeq'].isin(ligresnum)) & (water_neigh['attype'].isin(accpt_attypes) )].iterrows(): acceptor = topology[topology['serial'] == a['serial']].coord # Check omega angle: acceptor - water oxygen - donor h omega = 180 - angle(acceptor, w, y) if min_omega_angle < abs(omega) < max_omega_angle: dist_aw = distance(w, acceptor) dist_wd = distance(w, x) dwa_pairs.append( (omega, theta, dist_aw, dist_wd, d['serial'], a['serial'], water.serial.values[0])) logger.info( "Water bridge: donor {0}-{1}-{2}, acceptor {3}-{4}-{5}, water {6}. " "Dist d-w {7:.3f} a-w {8:.3f}. Omega: {8:.2f} Theta {9:.2f}" .format(donor.resName.values[0], donor.resSeq.values[0], donor.name.values[0], a.resName, a.resSeq, a.name, water.resSeq.values[0], dist_wd, dist_aw, omega, theta)) if wbfilter and len(dwa_pairs) > 1: dwa_pairs.sort(key=lambda v: (110 - v[0]) + (v[2] + v[3])) dwa_pairs = [dwa_pairs[0]] for bridge in dwa_pairs: cid = contacts[(contacts['source', 'serial'] == bridge[4]) & (contacts['target', 'serial'] == bridge[6])] tid = topology[topology['serial'] == bridge[5]] contacts.loc[cid.index, 'contact'] = set_contact_type( contacts.loc[cid.index, 'contact'], 'wb-da') contacts.loc[cid.index, ('target', 'angle')] = bridge[1] newindex = contacts.index.max() + 1 for mdx in [ col for col in contacts['target'].columns if not col == 'index' ]: contacts.loc[newindex, ('target', mdx)] = cid['target', mdx].values[0] for mdx in [ col for col in contacts['source'].columns if not col == 'index' ]: contacts.loc[newindex, ('source', mdx)] = tid[mdx].values[0] contacts.loc[newindex, 'contact'] = 'wb-da' contacts.loc[newindex, ('target', 'distance')] = bridge[2] contacts.loc[newindex, ('target', 'angle')] = bridge[0] # Query possible ligand acceptor - water - donor contacts awd_pairs = [] for ida, a in water_neigh[(water_neigh['resSeq'].isin(ligresnum)) & ( water_neigh['attype'].isin(accpt_attypes))].iterrows(): acceptor = topology[topology['serial'] == a['serial']].coord # Loop over possible donors for idd, d in water_neigh[ ~(water_neigh['resSeq'].isin(ligresnum)) & (water_neigh['attype'].isin(donor_attypes))].iterrows(): donor = topology[topology['serial'] == d['serial']] covalent_neighbours = donor.neighbours(covalent=True) x = donor.coord # Check if there are H-atoms attached and asses H-bond geometry criteria for idy, h in covalent_neighbours[covalent_neighbours['attype'] == 'H'].iterrows(): y = topology[topology['serial'] == h['serial']].coord # Check theta angle: donor - hydrogen - water oxygen theta = angle(x, y, w) if abs(theta) > min_theta_angle: # Check omega angle: acceptor - water oxygen - donor h omega = 180 - angle(acceptor, w, y) if min_omega_angle < abs(omega) < max_omega_angle: dist_aw = distance(w, acceptor) dist_wd = distance(w, x) awd_pairs.append( (omega, theta, dist_aw, dist_wd, d['serial'], a['serial'], water.serial.values[0])) logger.info( "Water bridge: donor {0}-{1}-{2}, acceptor {3}-{4}-{5}, water {6}. " "Dist d-w {7:.3f} a-w {8:.3f}. Omega: {8:.2f} Theta {9:.2f}" .format(donor.resName.values[0], donor.resSeq.values[0], donor.name.values[0], a.resName, a.resSeq, a.name, water.resSeq.values[0], dist_wd, dist_aw, omega, theta)) if wbfilter and len(awd_pairs) > 2: awd_pairs.sort(key=lambda x: (110 - x[0]) + (x[2] + x[3])) awd_pairs = awd_pairs[:2] for bridge in awd_pairs: cid = contacts[(contacts['source', 'serial'] == bridge[5]) & (contacts['target', 'serial'] == bridge[6])] tid = topology[topology['serial'] == bridge[4]] contacts.loc[cid.index, 'contact'] = set_contact_type( contacts.loc[cid.index, 'contact'], 'wb-ad') contacts.loc[cid.index, ('target', 'angle')] = bridge[1] newindex = contacts.index.max() + 1 for mdx in [ col for col in contacts['target'].columns if not col == 'index' ]: contacts.loc[newindex, ('target', mdx)] = cid['target', mdx].values[0] for mdx in [ col for col in contacts['source'].columns if not col == 'index' ]: contacts.loc[newindex, ('source', mdx)] = tid[mdx].values[0] contacts.loc[newindex, 'contact'] = 'wb-ad' contacts.loc[newindex, ('target', 'distance')] = bridge[2] contacts.loc[newindex, ('target', 'angle')] = bridge[0] # TODO: Reset dtype on atnum and resnum to int64 again. They get changed to float64 somehow. for n in ('source', 'target'): contacts[(n, 'serial')] = contacts[(n, 'serial')].astype('int64') contacts[(n, 'resSeq')] = contacts[(n, 'resSeq')].astype('int64') return contacts
def eval_hbonds(contacts, topology, max_hbond_dist=0.41, hbond_don_anglediv=50.0, hbond_acc_anglediv=90.0, optimize=True): """ Evaluate the presence of hydrogen bonded contacts in the provided contact DataFrame. This function does not evaluate water bridges. Prerequisites: This function uses the SYBYL atom types to identify possible hydrogen bond donor and acceptor atoms. At least one covalently bonded hydrogen is expected for donors and subsequently the possible bonding geometry in terms of distances and angles is evaluated. As a result the function requires the input structure to be fully protonated or having at least polar hydrogens attached. The geometry of the attached hydrogens influences the contacts identified. If the structure is not, or partially protonated, the method used to add hydrogens will influence the identified contacts. Structure derived from moleculare dynamics will likely having their (polar) hydrogens oriented as such to reflect a hydrogen bond if present. If hydrogens are added with another program this may not be the case. OpenBabel for instance will add hydrogens in standard conformation not taking into account the environment of the atom to wich hydrogens are attached. The HBplus program (McDonald I K & Thornton J M (1994). J. Mol. Biol., 238, 777-793.) also part of the LIGPLOT program, will optimize local hydrogen geometry first. Differences in the geometry of added hydrogens will mostly affect the angle criteria rather than the distance. To correct for non-optimized H-atom geometry without the need for optimization, the function allows the angle criteria to be a function of the number of attached atoms using the 'optimize' option. Angles are then defined as: 180 / number of non-isolated covalent neighbours - 1 for all donors that are not trigonal planar (N.pl3, N.plc, N.ar, N.2, O.2, O.co2, S.a) Algorithm: 1) Select all heavy atom contacts within max_hbond_dist. 2) Identify donor-acceptor pairs for source to target and target to source based on SYBYL atom types (see below). 3) Check if donor has at least one covalently bonded H-atom 4) Check if angle between donor - H - acceptor does not deviate more than hbond_don_anglediv from it's ideal in-plane (180) degree orientation (cone fit), (Hubbard & Haider, 2001). The value for hbond_don_anglediv is either fixed or a function of the number of covalently attached atoms (see above) when 'optimize' is True. 5) Check if the angle between the heavy atom acceptor neighbour - acceptor - H does not deviate more than hbond_acc_anglediv. 6) Check that distance donor-acceptor heavy atom is larger than donor-H-acceptor. atom descriptor base type donor1 acceptor directionality -------------------------------------------------------------------- sp3 N N.3 y y along lone pair sp2 N N.2 y y along lone pair sp N N.1 n y along lone pair Acidic N N.acid y y along lone pair 2 Aromatic N N.ar y y along lone pair Amide N N.am y n Quaternary N N.4 y n Uncharged trigonal N N.pl3 y n 3 Charged trigonal N N.plc y n 4 Hydroxyl O O.3 y y in plane of lone pair Ether O O.3 n y in plane of lone pair Carboxylate O O.co2 n y along lone pair Carbonyl O O.2 n y in plane of lone pair Nitro O O.2 n y along lone pair N-oxide O O.2 n y Amide O O.2 n y in plane of lone pair Neutral sulfur-bound O O.2 n y 5 Charged sulfur-bound O O.co2 n y cone 6 Phosphate O O.co2 n y cone Borate O O.co2 n y cone Other neg-charged O O.co2 n y Negative charged S S.m n y along lone pair sp2 S S.a n y along lone pair 1: Provided at least one H-atom covalently bound 2: An acidic nitrogen is a nitrogen bound by at least two single bonds 3: As in uncharged histidine residue 4: As in a guanidino residue 5: As in sulfonamides, sulfoxides, sulfones 6: As in sulphate groups :param contacts: contact DataFrame :type contacts: :py:DataFrame :param topology: Pandas DataFrame representing the structure :type topology: :interact:ToplogyDataFrame :param max_hbond_dist: Maximum hydrogen bond distance cutoff :type max_hbond_dist: :py:float :param hbond_don_anglediv: Maximum hydrogen bond donor-H-acceptor angle deviation. :type hbond_don_anglediv: :py:float :param hbond_acc_anglediv: Maximum hydrogen bond acceptor'-acceptor-H angle deviation. :type hbond_acc_anglediv: :py:float :param optimize: Rather to optimize angle cutoff based on donor atom geometry. :type optimize: :py:bool :return: Changes the 'contact' label in the contacts to hb-ad (hydrogen bond acceptor-donor) or hb-da (hydrogen bond donor-acceptor) for identified hydrogen-bonded contacts. Also add the value of the donor-H-acceptor angle. :rtype: :pandas:DataFrame """ # Preselect all contacts below max_hbond_dist hbdist = contacts[(contacts['target', 'distance'] <= max_hbond_dist)] logger.info( "Init eval_hbonds with {0} contacts using: max_hbond_dist={1}".format( len(hbdist), max_hbond_dist)) # Query for potential hbond donor-acceptor pairs accpt_attypes = ('N.3', 'N.2', 'N.1', 'N.acid', 'N.ar', 'O.3', 'O.co2', 'O.2', 'S.m', 'S.a') donor_attypes = ('N.3', 'N.2', 'N.acid', 'N.am', 'N.ar', 'N.4', 'N.pl3', 'N.plc', 'O.3') donor_avoid = ('N.pl3', 'N.plc', 'N.ar', 'N.2', 'O.2', 'O.co2', 'S.a') # Define donor_acceptor pairs. Source donor - target acceptor and vice versa donor_acceptor_dict = dict() donor_acceptor_dict['source'] = hbdist[ (hbdist['source', 'attype'].isin(donor_attypes)) & (hbdist['target', 'attype'].isin(accpt_attypes))] donor_acceptor_dict['target'] = hbdist[ (hbdist['source', 'attype'].isin(accpt_attypes)) & (hbdist['target', 'attype'].isin(donor_attypes))] logger.info("{0} contacts after selecting for donor-acceptor pairs".format( len(donor_acceptor_dict['source']))) logger.info("{0} contacts after selecting for acceptor-donor pairs".format( len(donor_acceptor_dict['target']))) # Search for hbonds label_dict = {'source': 'hb-da', 'target': 'hb-ad'} anglediv = copy.copy(hbond_don_anglediv) for direction, selection in donor_acceptor_dict.items(): target = 'source' if direction == 'source': target = 'target' # Ensure donor and acceptors have neighbours (e.a. not ions etc.) for idx, n in selection.iterrows(): donor = topology[topology['serial'] == n[direction, 'serial']] acceptor = topology[topology['serial'] == n[target, 'serial']] donor_bonded = donor.neighbours(covalent=True) acceptor_bonded = acceptor.neighbours(covalent=True) acceptor_bonded = acceptor_bonded[ (acceptor_bonded['attype'] != 'H') & (acceptor_bonded['resSeq'] == n[target, 'resSeq'])] # Get donor and acceptor heavy-atom coordinates donor_heavy_coor = donor.coord acceptor_heavy_coor = acceptor.coord # There should at least be covalent neighbours (e.a. not ions etc.) if donor_bonded.empty or acceptor_bonded.empty: logger.debug( 'No neighbours in contact pair {0}-{1}, skipping'.format( n[direction, 'serial'], n[target, 'serial'])) continue # Check if there are H-atoms attached and asses H-bond geometry criteria for idy, h in donor_bonded[(donor_bonded['attype'] == 'H') & ( donor_bonded['resSeq'] == n[direction, 'resSeq'])].iterrows(): # Get donor hydrogen coordinates donor_h_coor = h.coord # Angle donor - H - acceptor angle1 = angle(donor_heavy_coor, donor_h_coor, acceptor_heavy_coor) dist1 = distance(donor_h_coor, acceptor_heavy_coor) # Angle acceptor_neigh - acceptor - H, keep largest. angle2 = [] for serial in acceptor_bonded['serial']: acceptor_neigh = topology[topology['serial'] == serial].coord angle2.append( angle(acceptor_neigh, acceptor_heavy_coor, donor_h_coor)) # If optimize equals True, determine donor-H-acceptor angle deviation based on covalent bonding # geometry for all non trigonal planar donor atoms hbond_don_anglediv = anglediv if optimize and not donor['attype'].values[0] in donor_avoid: substitutions = 0 for idz, i in donor_bonded.iterrows(): r = topology[topology['serial'] == i['serial']].neighbours(covalent=True) if len(r) > 1: substitutions += 1 try: hbond_don_anglediv = (180 / float(substitutions)) except ZeroDivisionError: hbond_don_anglediv = 0.0 if (180 - hbond_don_anglediv < abs(angle1) < 180 + hbond_don_anglediv) and \ (180 - hbond_acc_anglediv < abs(max(angle2)) < 180 + hbond_acc_anglediv) and \ (contacts.loc[idx, 'target'].distance > dist1): contacts.loc[idx, 'contact'] = set_contact_type( contacts.loc[idx, 'contact'], label_dict[direction]) contacts.loc[idx, ('target', 'angle')] = angle1 logger.info( "H-bond between {0}-{1} {2}-{3} and {4}-{5} {6}-{7}. Distance D-A: {8:.3f}, " "Distance DH-A: {9:.3f}, angle: {10:.2f} deg. hbond_don_anglediv: {11:.2f}" .format(contacts.loc[idx, 'source'].resSeq, contacts.loc[idx, 'source'].resName, contacts.loc[idx, 'source'].serial, contacts.loc[idx, 'source'].name, contacts.loc[idx, 'target'].resSeq, contacts.loc[idx, 'target'].resName, contacts.loc[idx, 'target'].serial, contacts.loc[idx, 'target'].name, contacts.loc[idx, 'target'].distance, dist1, angle1, hbond_don_anglediv)) return contacts