def get_interface(st, dist): ''' Detects interface residues within a distance(dist) Assumes two chains, i.e. a unique interface set per chain. ''' select_ats = [] for at in st.get_atoms(): # Skip Hydrogens to reduce time if at.element != 'H': select_ats.append(at) nbsearch = NeighborSearch(select_ats) interface = {} # Sets are more efficient than lists. Use sets when order is not relevant for ch in st[0]: interface[ch.id] = set() for at1, at2 in nbsearch.search_all(dist): #Only different chains res1 = at1.get_parent() ch1 = res1.get_parent() res2 = at2.get_parent() ch2 = res2.get_parent() if ch1 != ch2: interface[ch1.id].add(res1) interface[ch2.id].add(res2) return interface
def getLigandNbrs(resids: List[Residue], struct: Structure) -> List[ResidueDict]: """KDTree search the neighbors of a given list of residues(which constitue a ligand) and return unique having tagged them with a ban identifier proteins within 5 angstrom of these residues. """ ns = NeighborSearch(list(struct.get_atoms())) nbrs = [] for r in resids: # a ligand consists of residues resatoms = r.child_list[0] # each residue has an atom plucked at random for nbrresidues in ns.search(resatoms.get_coord(), 5, level='R'): # we grab all residues in radius around that atom and extend the list of neighbors with those nbrs.extend([nbrresidues]) # Filter out the residues that constitute the ligand itself filtered = [] for neighbor in nbrs: present = 0 for constit in resids: if ResidueDict(constit) == ResidueDict(neighbor): present = 1 if present == 0: filtered.append(ResidueDict(neighbor)) return [*map(lambda x: addBanClass(x), set(filtered))]
def getInterface(struct, useCA=True, res2res_dist=8): if useCA: atomList = [ atom for atom in struct[0].get_atoms() if atom.name.startswith("CA") ] else: atomList = [ atom for atom in struct[0].get_atoms() if not atom.name.startswith("H") ] chains = struct[0].child_list searcher = NeighborSearch(atomList) allNeigs = searcher.search_all(res2res_dist, level="R") residuesBindingSitePerChain = { chain.get_id(): { "bindingSite": [] } for chain in chains } for res1, res2 in allNeigs: pdbId1, modelId1, chainId1, resId1 = res1.get_full_id() pdbId2, modelId2, chainId2, resId2 = res2.get_full_id() if chainId1 != chainId2: residuesBindingSitePerChain[chainId1]["bindingSite"].append( res1.get_id()) residuesBindingSitePerChain[chainId2]["bindingSite"].append( res2.get_id()) return residuesBindingSitePerChain
def compute_interactions(pdb_name): # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure s = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name) # Get the preferred chain preferred_chain = s.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = pdb_get_structure(pdb_name) # Get all atoms atom_list = Selection.unfold_entities(s.get_chains().__next__(), 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(4.5, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES] # For each pair of interacting residues, determine the type of interaction interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1])) for res_pair in all_aa_neighbors] # Split unto classified and unclassified. classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0] return classified
def getPairsOfResiduesInContact(self, structureL, structureR): ''' Computes which amino acids of ligand are in contact with which amino acids of receptor @param structureL: Bio.PDB.Structure. Structure of ligand (bound state if available) @param structureR: Bio.PDB.Structure. Structure of receptor (bound state if available). @return positiveContacts: Set {(Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR))} @return chainsNotContactL: Set { str(chainId structureL)} @return chainsNotContactR: Set { str(chainId structureR)} ''' try: atomListL = [ atom for atom in structureL.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 1") try: atomListR = [ atom for atom in structureR.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 2") searcher = NeighborSearch(atomListL + atomListR) allNeigs = searcher.search_all(self.res2res_dist, level="R") lStructId = structureL.get_id() rStructId = structureR.get_id() positiveContacts = set([]) chainsInContactL = set([]) chainsInContactR = set([]) for res1, res2 in allNeigs: pdbId1, modelId1, chainId1, resId1 = res1.get_full_id() pdbId2, modelId2, chainId2, resId2 = res2.get_full_id() fullResId1 = res1.get_full_id() fullResId2 = res2.get_full_id() if pdbId1 == lStructId and pdbId2 == rStructId: positiveContacts.add((fullResId1, fullResId2)) chainsInContactL.add(fullResId1[2]) chainsInContactR.add(fullResId2[2]) elif pdbId1 == rStructId and pdbId2 == lStructId: positiveContacts.add((fullResId2, fullResId1)) chainsInContactL.add(fullResId2[2]) chainsInContactR.add(fullResId1[2]) if CONSIDER_HOMOOLIG_AS_POS: positiveContacts, chainsInContactL, chainsInContactR = self.fixHomooligomers( structureL, structureR, positiveContacts, chainsInContactL, chainsInContactR) allChainsL = set([elem.get_id() for elem in structureL[0].get_list()]) allChainsR = set([elem.get_id() for elem in structureR[0].get_list()]) chainsNotContactL = allChainsL.difference(chainsInContactL) chainsNotContactR = allChainsR.difference(chainsInContactR) return positiveContacts, chainsNotContactL, chainsNotContactR
def getPairsOfResiduesInContact(self, structureL, structureR): ''' Computes which amino acids of ligand are in contact with which amino acids of receptor :param structureL: Bio.PDB.Structure. Structure of ligand unbound state if available :param structureR: Bio.PDB.Structure. Structure of receptor unbound state if available. :return positiveContacts, chainsNotContactL, chainsNotContactR positiveContacts: Set {( Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR) ) } chainsNotContactL: Set { Bio.PDB.Chain.get_id()} for ligand chains that are not in contact chainsNotContactR: Set { Bio.PDB.Chain.get_id()} for receptor chains that are not in contact ''' try: atomListL = [ atom for atom in structureL.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 1") try: atomListR = [ atom for atom in structureR.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 2") searcher = NeighborSearch(atomListL + atomListR) allNeigs = searcher.search_all(self.res2res_dist, level="R") lStructId = structureL.get_id() rStructId = structureR.get_id() positiveContactsResidues = set([]) chainsInContactL = set([]) chainsInContactR = set([]) for res1, res2 in allNeigs: pdbId1, modelId1, chainId1, resId1 = res1.get_full_id() pdbId2, modelId2, chainId2, resId2 = res2.get_full_id() if pdbId1 == lStructId and pdbId2 == rStructId: positiveContactsResidues.add((res1, res2)) chainsInContactL.add(chainId1) chainsInContactR.add(chainId2) elif pdbId1 == rStructId and pdbId2 == lStructId: positiveContactsResidues.add((res2, res1)) chainsInContactL.add(chainId2) chainsInContactR.add(chainId1) allChainsL = set([elem.get_id() for elem in structureL[0].get_list()]) allChainsR = set([elem.get_id() for elem in structureR[0].get_list()]) chainsNotContactL = allChainsL.difference(chainsInContactL) chainsNotContactR = allChainsR.difference(chainsInContactR) return positiveContactsResidues, chainsNotContactL, chainsNotContactR
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assert_(hits >= 0)
def consumeMoleDataframe(self, radius: float): """ Takes a dataframe which must contain X,Y,Z columns assuming the centerline of the tunnel, although other columns can be present. Aimed at MOLE's mergd csv results. Iterates over each row and applies neighbor search on each focus, appends non-redundant residues to appropriate registry on the object. """ atoms = list(self.structure.get_atoms()) ns = NeighborSearch(atoms, bucket_size=3) self.radius = radius def getVicinity(row): self.rescount += 1 x = row['X'] y = row['Y'] z = row['Z'] res: List[Residue.Residue] = ns.search(numpy.array([x, y, z]), radius, level='R') for nbr in res: self.addResidue(nbr) self.mole_dataframe.apply(getVicinity, axis=1)
def quick_neighbor_search_test(): #Based on the self test in Bio.PDB.NeighborSearch from numpy.random import random from Bio.PDB.NeighborSearch import NeighborSearch class Atom: def __init__(self): self.coord=(100*random(3)) def get_coord(self): return self.coord for i in range(0, 20): al = [Atom() for j in range(100)] ns=NeighborSearch(al) hits = ns.search_all(5.0) assert hits >= 0 print "Done"
def quick_neighbor_search_test(): #Based on the self test in Bio.PDB.NeighborSearch from numpy.random import random from Bio.PDB.NeighborSearch import NeighborSearch class Atom: def __init__(self): self.coord = (100 * random(3)) def get_coord(self): return self.coord for i in range(0, 20): al = [Atom() for j in range(100)] ns = NeighborSearch(al) hits = ns.search_all(5.0) assert hits >= 0 print "Done"
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertTrue(isinstance(hits, list), hits) self.assertTrue(len(hits) >= 0, hits)
def getLigandNbrs(resids: List[Residue], struct:Structure): ns = NeighborSearch(list( struct.get_atoms() )) nbrs = [] for r in resids: # a ligand consists of residues resatom = r.child_list[0] # each residue has an atom plucked at random for nbr in ns.search(resatom.get_coord(), 5,level='R'): # we grab all residues in radius around that atom and extend the list with those nbrs.extend([* nbr]) filtered = [] for neighor in nbrs: present = 0 for constit in resids: if ResidueId( constit ) == ResidueId( neighor ): present = 1 if present == 0: filtered.append(ResidueId(neighor)) return [ * map(lambda x: addBanClass(x) , set(filtered) ) ]
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ # This comment stops black style adding a blank line here, which causes flake8 D202. class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertTrue(isinstance(hits, list), hits) self.assertTrue(len(hits) >= 0, hits) x = array([250, 250, 250]) # Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertTrue(isinstance(hits, list), hits) self.assertTrue(len(hits) >= 0, hits) x = array([250,250,250]) #Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def getResidueNeighbors(struct:Structure, resid: ResidueFullIdDict, radius:float, level:str = 'R', all_levels=False)-> List[Residue] or List[Chain] or List[Chain or Residue]: """ struct: opened cif structure resid: A dictionary containing the residue's identifiers (see associated class) radiue: radius of neighborhood to check level: Atom, Residue, Chain : one of A/R/C """ if level.upper() not in ['A', 'R', 'C']: print('Level has to be one of A R C') raise Error parentStrand :Chain = struct[resid.model][resid.strand_id] residuesOfInterest:List[Residue] = list(filter(lambda x : x.get_full_id()[3][1] == resid.residue_id, parentStrand.child_list)) ligandAtoms = residuesOfInterest[0].get_atoms() coords = list( map(lambda atom: atom.get_coord(), ligandAtoms) ) ns = NeighborSearch(list( struct.get_atoms() )) yield ns.search(coords[0],radius,level.upper())
def get_contacts(atoms=[]): neighbor_search = NeighborSearch(atoms) neighbors = neighbor_search.search_all(radius=SEARCH_RADIUS) #print("Size of Neighbhor Map: %d" % len(neighbors)) #neighbors_map: dictionary with tuple(ca_i,ca_j) as keys - eg: [(ca1,ca2):1,(ca21,ca45):1,...] neighbors_map = {(x[0].get_serial_number(), x[1].get_serial_number()): 1 for x in neighbors} no_of_native_contacts = len(neighbors_map) contact_map = {} n = len(atoms) #constuct contact map #total n(n-1)/2 entries, ( which excludes diagonal and symmetric entries from n*n pairs) for x in range(1, n - 1): for y in range((x + 1), n): #put {(Ci,Cj):1} if present if present in neighbors map, 0 otherwise contact_map[(x, y)] = neighbors_map.get( (atoms[x].get_serial_number(), atoms[y].get_serial_number()), 0) # print("In get_contacts:Size of atoms: %d :Length of the contact map: %d" % (n,len(contact_map))) #print contact_map return contact_map, no_of_native_contacts
def get_backbone_links(struc, backbone_atoms, covlnk, join_models=True): """ Get links making the main chain """ # TODO differenciate Protein and NA cov_links = [] for mod in struc: bckats = [] for atm in struc[mod.id].get_atoms(): if atm.id in backbone_atoms: if atm.disordered_flag: bckats.append(atm.selected_child) else: bckats.append(atm) if bckats: nbsearch = NeighborSearch(bckats) for at1, at2 in nbsearch.search_all(covlnk): if not same_residue(at1, at2) \ and (join_models or same_model(at1.get_parent(), at2.get_parent())): cov_links.append( sorted([at1, at2], key=lambda x: x.serial_number)) else: print("Warning: No backbone atoms defined") return cov_links
def findNeigChains(struct, chainIdL, chainIdR, res2res_dist=6, minContacts=20): searcher = NeighborSearch([ atom for atom in struct[0].get_atoms() if atom is not None and not atom.name.startswith("H") ]) allNeigs = searcher.search_all(res2res_dist, level="C") # print(allNeigs) chainL = struct[0][chainIdL] chainR = struct[0][chainIdR] ligandChains = set([chainL]) receptorChains = set([chainR]) addedChains = ligandChains.union(receptorChains) for neigsGroup in allNeigs: searcher = NeighborSearch([ atom for chain in neigsGroup for atom in chain.get_atoms() if atom is not None and not atom.name.startswith("H") and atom.get_parent().resname != "HOH" ]) resNeigs = searcher.search_all(res2res_dist, level="R") numContacts = 0 for r1, r2 in resNeigs: if r1.get_parent().get_id() != r2.get_parent().get_id(): numContacts += 1 # print(neigsGroup, numContacts) if numContacts < minContacts: continue if chainL in neigsGroup or chainR in neigsGroup: for chain in neigsGroup: if chain not in addedChains: if chainR in neigsGroup and chainL in neigsGroup: if bool(random.getrandbits(1)): receptorChains.add(chain) else: ligandChains.add(chain) addedChains.add(chain) elif chainR in neigsGroup: receptorChains.add(chain) addedChains.add(chain) elif chainL in neigsGroup: ligandChains.add(chain) addedChains.add(chain) return ligandChains, receptorChains
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertIsInstance(hits, list) self.assertGreaterEqual(len(hits), 0) x = array([250, 250, 250]) # Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def compute_interactions(pdb_name, save_to_db=False): do_distances = True do_interactions = True distances = [] classified = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get( protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude( generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1, res1 in enumerate(chain, 1): if not is_water(res1): for i2, res2 in enumerate(chain, 1): if i2 > i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA'] - res2['CA'] distances.append( (dbres[res1.id[1]], dbres[res2.id[1]], distance, dblabel[res1.id[1]], dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(6.6, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [ pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1]) ] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [ pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES ] # For each pair of interacting residues, determine the type of interaction interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]], struc) for res_pair in all_aa_neighbors if not is_water(res_pair[0]) and not is_water(res_pair[1]) ] # Split unto classified and unclassified. classified = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter( referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [ (water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res) and ( is_hba(match_res) or is_hbd(match_res)) ] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one + 1, len(water_neighbors)) if water_pair_one[0] == water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] # TODO: order residues + check minimum spacing between residues key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) # Verify h-bonds between water and both residues matches_one = InteractingPair.verify_water_hbond( water_pair_one[1], water_pair_one[0]) matches_two = InteractingPair.verify_water_hbond( water_pair_two[1], water_pair_two[0]) if len(matches_one) > 0 and len(matches_two) > 0: # if not exists, create residue pair without interactions if not key in interaction_pairs: interaction_pairs[key] = InteractingPair( res_1, res_2, dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for a, b in zip(matches_one, matches_two): # HACK: store water ID as part of first atom name interaction_pairs[key].interactions.append( WaterMediated( a + "|" + str(water_pair_one[0].get_parent( ).get_id()[1]), b)) for p in classified: p.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i, d in enumerate(distances): distance = Distance(distance=int(100 * d[2]), res1=d[0], res2=d[1], gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3], d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances) > 1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
# load structure from PDB file st = parser.get_structure('1UBQ', '1ubq.pdb') select = [] #Select only CA atoms for at in st.get_atoms(): if at.id == 'CA': select.append(at) print("ATOM:", at.get_parent().get_resname(), at.get_parent().id[1], at.id) # Preparing search nbsearch = NeighborSearch(select) print("NBSEARCH:") #Searching for contacts under HBLNK ncontact = 1 for at1, at2 in nbsearch.search_all(MAXDIST): print("Contact: ", ncontact) print("at1", at1, at1.get_serial_number(), at1.get_parent().get_resname()) print("at2", at2, at2.get_serial_number(), at2.get_parent().get_resname()) print() ncontact += 1
def compute_interactions(pdb_name,save_to_db = False): do_distances = True do_interactions = True distances = [] classified = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude(generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1,res1 in enumerate(chain,1): if not is_water(res1): for i2,res2 in enumerate(chain,1): if i2>i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA']-res2['CA'] distances.append((dbres[res1.id[1]],dbres[res2.id[1]],distance,dblabel[res1.id[1]],dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(4.5, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES] # For each pair of interacting residues, determine the type of interaction interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1]),dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]],struc) for res_pair in all_aa_neighbors] # Split unto classified and unclassified. classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0] if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter(referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [(water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res)] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one+1, len(water_neighbors)) if water_pair_one[0]==water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) # Check if interaction is polar - NOTE: this is not capturing every angle if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])): # NOTE: Is splitting of sidechain and backbone-mediated interactions desired? if key in interaction_pairs: interaction_pairs[key].interactions.append(WaterMediated()) else: interaction_pairs[key] = InteractingPair(res_1, res_2, [WaterMediated()], dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for p in classified: p.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i,d in enumerate(distances): distance = Distance(distance=int(100*d[2]),res1=d[0], res2=d[1],gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3],d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances)>1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
def main(): parser = argparse.ArgumentParser(prog='polarContacts', description='Polar contacts detector') parser.add_argument('--backonly', action='store_true', dest='backonly', help='Restrict to backbone') parser.add_argument('--nowats', action='store_true', dest='nowats', help='Exclude water molecules') parser.add_argument('--diel', type=float, action='store', dest='diel', default=1.0, help='Relative dielectric constant') parser.add_argument('--vdw', action='store', dest='vdwprm', help='VDW Paramters file') parser.add_argument('--rlib', action='store', dest='reslib', help='AminoAcid library') parser.add_argument('pdb_path') args = parser.parse_args() print("Settings") print("--------") for k, v in vars(args).items(): print('{:10}:'.format(k), v) backonly = args.backonly nowats = args.nowats pdb_path = args.pdb_path vdwprm = args.vdwprm reslib = args.reslib diel = args.diel # Load VDW parameters vdwParams = VdwParamset(vdwprm) print("{} atom types loaded".format(vdwParams.ntypes)) # Load AA Library aaLib = ResiduesDataLib(reslib) print("{} amino acid atoms loaded".format(aaLib.nres)) if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print("#ERROR: loading PDB") sys.exit(2) # Checking for models if len(st) > 1: print("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1 - at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue # atom1 = Atom(at1,1,aaLib,vdwParams) # atom2 = Atom(at2,1,aaLib,vdwParams) if at1.get_serial_number() < at2.get_serial_number(): hblist.append([at1, at2]) else: hblist.append([at2, at1]) print() print("Polar contacts") print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)')) for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): r1 = hb[0].get_parent() r2 = hb[1].get_parent() print('{:14} {:14} {:6.3f} '.format( r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id, r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1])) print() print("Residue interactions") # Making list or residue pairs to avoid repeated pairs respairs = [] for hb in hblist: r1 = hb[0].get_parent() r2 = hb[1].get_parent() if [r1, r2] not in respairs: respairs.append([r1, r2]) list5 = [] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. evdw = 0. for at1 in rpair[0].get_atoms(): resid1 = rpair[0].get_resname() atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) vdwprm1 = vdwParams.atTypes[atparam1.atType] for at2 in rpair[1].get_atoms(): resid2 = rpair[1].get_resname() atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) vdwprm2 = vdwParams.atTypes[atparam2.atType] eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / ( at1 - at2) eps = math.sqrt(vdwprm1.eps * vdwprm2.eps) sig = math.sqrt(vdwprm1.sig * vdwprm2.sig) evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 - (sig / (at1 - at2))**6) list5.append((resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw)) #list all print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw) # eint= electrostatic, evdw= vanderwaals, eint+evdw= total i = 0 for element in sorted(list5, key=lambda li: li[6]): #sort list i += 1 print(element) if i == 5: break #gives you the 5 first atoms
st = parser.get_structure('1UBQ', args.pdb_file) polar_ats = [] for at in st.get_atoms(): res = at.get_parent() if res.id[0].startswith('H_') or res.id[0].startswith('W') and not args.hetatm: continue if at.element in POLAR: polar_ats.append(at) print (len(polar_ats), 'POLAR Atoms found') # Preparing search nbsearch = NeighborSearch(polar_ats) at_pairs = nbsearch.search_all(HBLNK) hbs = {} for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number): res1 = at1.get_parent() res2 = at2.get_parent() # remove atom pairs from the same residue and next in sequence if res2.id[1] - res1.id[1] < 2: continue # using the contact with the shortest distance between residues dist = at1 - at2 if res1 not in hbs: hbs[res1] = {}
def find_ligands(self, radius=3): """ Searches the parent structure for hetero components close to the catalytic residues, by searching around the atoms of catalytic residues and the dummy atoms between distant residues. Populates the ligands list with Het objects. Args: radius: the search space (in Å) around the atoms of the catalytic residues """ if type(self.parent_structure) != Structure: return # Get centers of search centers = self._get_ligand_search_centers(radius) # Initialize KD tree query_atoms = Bio.PDB.Selection.unfold_entities( self.parent_structure, 'A') ns = NeighborSearch(query_atoms) # Search for ligands around each center polymers = defaultdict(list) site_chains = set([res.chain for res in self]) seen = set() added = set() # Search for components close to catalytic residues for center in centers: hits = ns.search(center, radius, level='R') for res in hits: if res.get_full_id() in seen: continue seen.add(res.get_full_id()) restype = res.get_id()[0][0] chain = res.get_parent().get_id() # Ignore waters if restype == 'W': continue # HET components if restype == 'H': self.add( Het(self.mcsa_id, self.pdb_id, res.get_resname(), res.get_id()[1], chain, structure=res, parent_site=self)) added.add(res.get_full_id()) # Protein/nucleic polymer components if restype == ' ' and chain not in site_chains: polymers[chain].append(res) # Build polymers if self.acts_on_polymer: for chain, reslist in polymers.items(): self.add( Het.polymer(reslist, self.mcsa_id, self.pdb_id, chain, self)) # Find distal co-factor-like or substrate-like molecules hits = ns.search(self.structure.center_of_mass(geometric=True), 30, level='R') for res in hits: restype = res.get_id()[0][0] if restype == 'H' and res.get_full_id() not in added: ligand = Het(self.mcsa_id, self.pdb_id, res.get_resname(), res.get_id()[1], res.get_parent().get_id(), structure=res, parent_site=self) if ligand.type in ('Substrate (non-polymer)', 'Co-factor (non-ion)'): ligand.is_distal = True self.add(ligand) return
# # Simple program to search contacts from Bio.PDB.NeighborSearch import NeighborSearch from Bio.PDB.PDBParser import PDBParser HBLNK = 3.5 # Define distance for a contact parser = PDBParser(PERMISSIVE=1) st = parser.get_structure('estructura', '1ubq.pdb') selecc = [] for at in st.get_atoms(): selecc.append(at) print("ATOM:", at) #Selecting all atoms. nbsearch = NeighborSearch(selecc) print("NBSEARCH:") #Searching for contacts under HBLNK ncontact = 1 for at1, at2 in nbsearch.search_all(HBLNK): print("Contact: ", ncontact) print("at1", at1, at1.get_serial_number(), at1.get_parent().get_resname()) print("at2", at2, at2.get_serial_number(), at2.get_parent().get_resname()) print() ncontact += 1
print('Parsing', args.pdb_file) # load structure from PDB file of PDB ifle handler st = parser.get_structure('STR', args.pdb_file.name) # collecting Polar atoms polar_atoms = [] for at in st.get_atoms(): if at.element in POLAR_ELEMENTS: polar_atoms.append(at) print(len(polar_atoms), 'polar Atoms found') # Preparing search nbsearch = NeighborSearch(polar_atoms) at_pairs = nbsearch.search_all(args.hb_max_dist) # Output sorted by atom,serial_number, nbsearch returns ordered pairs # Redirect the output with > output_list for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number): ad = True da = True if args.check: da, ad = check_hb(at1, at2) if da and ad: # classification not requested or not predictable print('{:11} : {:11} {:8.3f}'.format(atom_id(at1), atom_id(at2), at1 - at2)) elif da: # Donor - Acceptor print('{:11}D : {:11}A {:8.3f}'.format(atom_id(at1), atom_id(at2),
def main(): parser = argparse.ArgumentParser( prog='polarContacts', description='Polar contacts detector' ) parser.add_argument( '--backonly', action='store_true', dest='backonly', help='Restrict to backbone' ) parser.add_argument( '--nowats', action='store_true', dest='nowats', help='Exclude water molecules' ) parser.add_argument('pdb_path') args = parser.parse_args() print ("Settings") print ("--------") for k,v in vars(args).items(): print ('{:10}:'.format(k),v) backonly = args.backonly nowats =args.nowats pdb_path = args.pdb_path if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print ("#ERROR: loading PDB") sys.exit(2) # Checking for models if len(st) > 1: print ("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1-at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue if at1.get_serial_number() < at2.get_serial_number(): hblist.append([StructureWrapper.Atom(at1,1),StructureWrapper.Atom(at2,1)]) else: hblist.append([StructureWrapper.Atom(at2,1),StructureWrapper.Atom(at1,1)]) print () print ("Polar contacts") print ('{:13} ({:4}, {:6}) {:13} ({:4}, {:6}) {:6} '.format( 'Atom1','Type','Charge','Atom2','type','charge','Dist (A)') ) for hb in sorted (hblist,key=lambda i: i[0].at.get_serial_number()): print ('{:14} {:14} {:6.3f} '.format( hb[0].atid(), hb[1].atid(), hb[0].at - hb[1].at ) )
for k, v in vars(args).items(): print('{:10}:'.format(k), v) print("PDB.filename:", args.pdb_file.name) parser = PDBParser(PERMISSIVE=1) print('Parsing', args.pdb_file) # load structure from PDB file of PDB ifle handler st = parser.get_structure('STR', args.pdb_file.name) # collecting CA atoms ca_atoms = [] for at in st.get_atoms(): if at.id == 'CA': ca_atoms.append(at) print(len(ca_atoms), 'CA Atoms found') # Preparing search nbsearch = NeighborSearch(ca_atoms) at_pairs = nbsearch.search_all(args.max_dist) # Output sorted by atom,serial_number, nbsearch returns ordered pairs # Redirect the output with > output_list for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number): print(atom_id(at1), ":", atom_id(at2), at1 - at2)
coo_list.append(coo) return res_list, np.concatenate(coo_list).reshape((-1, 3)) # %% p, c = "4fae", "B" parser = PDBParser() if PreProcess.download_pdb(p, f'{p}.pdb'): structure = parser.get_structure('a', f'{p}.pdb') chain = structure[0][c] res_list, coo_list = get_residue_feature(chain) atom_list = [i for i in chain.get_atoms()] ns = NeighborSearch(atom_list) ns_list = ns.search_all(3.3, level='R') edge, l = [], len(res_list) for ai, aj in ns_list: i, j = ai.get_id()[1]-1, aj.get_id()[1]-1 if i < j-1 and j < l: edge.append([i, j, 1]) #%% mod = 'train' out = {'train': {}, 'test':{}, 'valid':{}} out[mod]['edge'] =
def main(): parser = argparse.ArgumentParser(prog='polarContacts', description='Polar contacts detector') parser.add_argument('--backonly', action='store_true', dest='backonly', help='Restrict to backbone') parser.add_argument( #Argument to plot the different number of polar contacts that each residue has '--plot', action='store_true', dest='plotMode', help='Restrict to sidechain') parser.add_argument('--nowats', action='store_true', dest='nowats', help='Exclude water molecules') parser.add_argument('--diel', type=float, action='store', dest='diel', default=1.0, help='Relative dielectric constant') parser.add_argument('--vdw', action='store', dest='vdwprm', help='VDW Paramters file') parser.add_argument('--rlib', action='store', dest='reslib', help='AminoAcid library') parser.add_argument( '--index', action='store', dest='index', help='Only select the molecules with more stable contact by an index') parser.add_argument( '--surf', action='store_true', dest='surf', help='Usa ASA', ) parser.add_argument('pdb_path') args = parser.parse_args() print("Settings") print("--------") for k, v in vars(args).items(): print('{:10}:'.format(k), v) backonly = args.backonly nowats = args.nowats plotMode = args.plotMode pdb_path = args.pdb_path vdwprm = args.vdwprm reslib = args.reslib index = args.index surf = args.surf diel = args.diel # Load VDW parameters vdwParams = VdwParamset(vdwprm) print("{} atom types loaded".format(vdwParams.ntypes)) # Load AA Library aaLib = ResiduesDataLib(reslib) print("{} amino acid atoms loaded".format(aaLib.nres)) if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print("#ERROR: loading PDB") sys.exit(2) # Checking for model if len(st) > 1: print("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Getting surfaces if surf: res_surfaces = NACCESS( st, naccess_binary= '/Users/daniel/Downloads/BioPhysics-energies0/NACCESS/naccess') at_surfaces = NACCESS_atomic( st, naccess_binary= '/Users/daniel/Downloads/BioPhysics-energies0/NACCESS/naccess') print("Surfaces obtained from NACCESS") # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1 - at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue atom1 = Atom(at1, 1, aaLib, vdwParams) atom2 = Atom(at2, 1, aaLib, vdwParams) if at1.get_serial_number() < at2.get_serial_number(): hblist.append([at1, at2]) else: hblist.append([at2, at1]) print() print("Polar contacts") print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)')) if plotMode: #Dictionary to save as a key the residues with polar contacts and as values the number of contacts on sidechain or mainchain sidecontacts = {} maincontacts = {} res = set( ) #A set to store the residues involved on polar contacts. stored as a set to avoid repeated residues for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): r1 = hb[0].get_parent() r2 = hb[1].get_parent() res.add(str(r1.id[1]) + r1.get_resname()) print('{:14} {:14} {:6.3f} '.format( r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id, r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1])) if surf: print('{:14} ({:6.3f}) {:14} ({:6.3f}) {:6.3f} '.format( hb[0].atid(), float(hb[0].at.xtra['EXP_NACCESS']), hb[1].atid(), float(hb[1].at.xtra['EXP_NACCESS']), hb[0].at - hb[1].at)) if plotMode: if hb[0].id in backbone_polars and hb[1].id in backbone_polars: if r1.id[1] not in maincontacts: maincontacts[r1.id[1]] = 1 if r1.id[1] not in sidecontacts: sidecontacts[r1.id[1]] = 0 else: maincontacts[r1.id[1]] += 1 else: if r1.id[1] not in sidecontacts: if r1.id[1] not in maincontacts: maincontacts[r1.id[1]] = 0 sidecontacts[r1.id[1]] = 1 else: sidecontacts[r1.id[1]] += 1 if plotMode: #Plot the number of polar contacts of each residue on a bar graph. #Contacts on mainchain are blue bars, contacts on sidechain red bars N = len(res) nindex = np.arange(N) main_con = maincontacts.values() side_con = sidecontacts.values() fig, ax = plt.subplots() bar_width = 0.35 rec1 = ax.bar(nindex, main_con, bar_width, color='b', label='Mainchain contacts') rec2 = ax.bar(nindex + bar_width, side_con, bar_width, color='r', label='Sidechain contacts') ax.set_xlabel('AminoAcid Number') ax.set_ylabel('Number of polar contacts') ax.set_xticks(nindex + bar_width / 2, res) ax.set_xticklabels(res) ax.legend() fig.tight_layout() plt.show() print() print("Residue interactions") # Making list or residue pairs to avoid repeated pairs respairs = [] for hb in hblist: r1 = Residue(hb[0].get_parent(), 1, aaLib, vdwParams) r2 = Residue(hb[1].get_parent(), 1, aaLib, vdwParams) if [r1, r2] not in respairs: respairs.append([r1, r2]) l = [] for rpair in sorted(respairs, key=lambda i: i[0].resNum()): eint = rpair[0].elecInt(rpair[1], diel) evdw = rpair[0].vdwInt(rpair[1]) print('{:10} {:10} {: 8.4f} {: 8.4f} {: 8.4f}'.format( rpair[0].resid(), rpair[1].resid(), eint, evdw, eint + evdw)) l.append([ rpair[0].resid(), rpair[0], rpair[1].resid(), rpair[1], eint, evdw, eint + evdw ]) if index is not None: #Select only the residues with less energy (the most stables ones) for e, element in enumerate(sorted(l, key=lambda i: i[6])): if e < int(index): print(element) if surf: srfr1 = float(rpair[0].residue.xtra['EXP_NACCESS']['all_polar_rel']) srfr2 = float(rpair[1].residue.xtra['EXP_NACCESS']['all_polar_rel']) # Define 30% threshold for buried if (srfr1 > 30.) or (srfr2 > 30.): diel0 = 80.0 else: diel0 = diel eint = rpair[0].elecInt(rpair[1], diel0) print( '{:10} ({:>8.3f}) {:10} ({:>8.3f}) (e: {:4.1f}) {:>8.4f} {:>8.4f} {:>8.4f}' .format(rpair[0].resid(), srfr1, rpair[1].resid(), srfr2, diel0, eint, evdw, eint + evdw))
def _get_contacts(ats_list, d_cutoff): contact_list = [] nbsearch = NeighborSearch(ats_list) for at1, at2 in nbsearch.search_all(d_cutoff): contact_list.append((at1.get_parent(), at2.get_parent(), at1 - at2)) return contact_list
def compute_interactions(pdb_name, save_to_db=False): do_distances = True do_interactions = True do_complexes = True distances = [] classified = [] classified_complex = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get( protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude( generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1, res1 in enumerate(chain, 1): if not is_water(res1): for i2, res2 in enumerate(chain, 1): if i2 > i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA'] - res2['CA'] distances.append( (dbres[res1.id[1]], dbres[res2.id[1]], distance, dblabel[res1.id[1]], dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(6.6, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [ pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1]) ] # Only include contacts between residues more than NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [ pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES ] # For each pair of interacting residues, determine the type of interaction interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]], struc) for res_pair in all_aa_neighbors if not is_water(res_pair[0]) and not is_water(res_pair[1]) ] # Split unto classified and unclassified. classified = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] if do_complexes: try: # check if structure in signprot_complex complex = SignprotComplex.objects.get(structure=struc) # Get all GPCR residue atoms based on preferred chain gpcr_atom_list = [ atom for residue in Selection.unfold_entities(s[preferred_chain], 'R') if is_aa(residue) \ for atom in residue.get_atoms()] # Get all residue atoms from the coupled protein (e.g. G-protein) # NOW: select alpha subnit protein chain using complex model sign_atom_list = [ atom for residue in Selection.unfold_entities(s[complex.alpha], 'R') if is_aa(residue) \ for atom in residue.get_atoms()] ns_gpcr = NeighborSearch(gpcr_atom_list) ns_sign = NeighborSearch(sign_atom_list) # For each GPCR atom perform the neighbor search on the signaling protein all_neighbors = { (gpcr_atom.parent, match_res) for gpcr_atom in gpcr_atom_list for match_res in ns_sign.search(gpcr_atom.coord, 4.5, "R") } # For each pair of interacting residues, determine the type of interaction residues_sign = ProteinConformation.objects.get( protein__entry_name=pdb_name + "_" + complex.alpha.lower()).residue_set.exclude( generic_number=None).all().prefetch_related( 'generic_number') # grab labels from sign protein dbres_sign = {} dblabel_sign = {} for r in residues_sign: dbres_sign[r.sequence_number] = r dblabel_sign[r.sequence_number] = r.generic_number.label # Find interactions interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres_sign[res_pair[1].id[1]], struc) for res_pair in all_neighbors if res_pair[0].id[1] in dbres and res_pair[1].id[1] in dbres_sign ] # Filter unclassified interactions classified_complex = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] # Convert to dictionary for water calculations interaction_pairs = {} for pair in classified_complex: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # Obtain list of all water molecules in the structure water_list = { water for chain in s for residue in chain if residue.get_resname() == "HOH" for water in residue.get_atoms() } # If waters are present calculate water-mediated interactions if len(water_list) > 0: ## Iterate water molecules over coupled and gpcr atom list water_neighbors_gpcr = { (water, match_res) for water in water_list for match_res in ns_gpcr.search(water.coord, 3.5, "R") } water_neighbors_sign = { (water, match_res) for water in water_list for match_res in ns_sign.search(water.coord, 3.5, "R") } # TODO: DEBUG AND VERIFY this code as water-mediated interactions were present at this time # 1. UPDATE complexes to include also mini Gs and peptides (e.g. 4X1H/6FUF/5G53) # 2. Run and verify water-mediated do_interactions # 3. Improve the intersection between the two hit lists ## TODO: cleaner intersection between hits from the two Lists # see new code below # for water_pair_one in water_neighbors_gpcr: # for water_pair_two in water_neighbors_sign: # if water_pair_one[0]==water_pair_two[0]: # res_1 = water_pair_one[1] # res_2 = water_pair_two[1] # key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) # Check if interaction is polar # if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])): # TODO Check if water interaction is already present (e.g. multiple waters) # TODO Is splitting of sidechain and backbone-mediated interactions desired? # if not key in interaction_pairs: # interaction_pairs[key] = InteractingPair(res_1, res_2, dbres[res_1.id[1]], dbres_sign[res_2.id[1]], struc) # TODO: fix assignment of interacting atom labels (now seems limited to residues) # interaction_pairs[key].interactions.append(WaterMediated(a + "|" + str(water_pair_one[0].get_parent().get_id()[1]), b)) except SignprotComplex.DoesNotExist: # print("No complex definition found for", pdb_name) log = "No complex definition found for " + pdb_name except ProteinConformation.DoesNotExist: print( "No protein conformation definition found for signaling protein of ", pdb_name) # log = "No protein conformation definition found for signaling protein of " + pdb_name if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter( referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [ (water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res) and ( is_hba(match_res) or is_hbd(match_res)) ] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one + 1, len(water_neighbors)) if water_pair_one[0] == water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] # TODO: order residues + check minimum spacing between residues key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) # Verify h-bonds between water and both residues matches_one = InteractingPair.verify_water_hbond( water_pair_one[1], water_pair_one[0]) matches_two = InteractingPair.verify_water_hbond( water_pair_two[1], water_pair_two[0]) if len(matches_one) > 0 and len(matches_two) > 0: # if not exists, create residue pair without interactions if not key in interaction_pairs: interaction_pairs[key] = InteractingPair( res_1, res_2, dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for a, b in zip(matches_one, matches_two): # HACK: store water ID as part of first atom name interaction_pairs[key].interactions.append( WaterMediated( a + "|" + str(water_pair_one[0].get_parent( ).get_id()[1]), b)) for p in classified: p.save_into_database() if do_complexes: for pair in classified_complex: pair.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i, d in enumerate(distances): distance = Distance(distance=int(100 * d[2]), res1=d[0], res2=d[1], gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3], d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances) > 1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
parser = PDBParser(PERMISSIVE=1) print ('Parsing', args.pdb_file) # load structure from PDB file of PDB ifle handler st = parser.get_structure('STR', args.pdb_file.name) # collecting atom candidates bck_atoms=[] for at in st.get_atoms(): if at.id in PEP_BOND_ATS : bck_atoms.append(at) print (len(bck_atoms), 'candidate atoms found') # Preparing search nbsearch = NeighborSearch(bck_atoms) at_pairs = nbsearch.search_all(COVLNK) # Output sorted by atom,serial_number, nbsearch returns ordered pairs # Redirect the output with > output_list for at1, at2 in sorted(at_pairs, key=lambda at_pair: at_pair[0].serial_number): # Discard same residue if at1.get_parent() == at2.get_parent(): continue print ('{:11} : {:11} {:8.3f}'.format(atom_id(at1),atom_id(at2), at1-at2))