def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ # This comment stops black style adding a blank line here, which causes flake8 D202. class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertTrue(isinstance(hits, list), hits) self.assertTrue(len(hits) >= 0, hits) x = array([250, 250, 250]) # Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def getLigandNbrs(resids: List[Residue], struct: Structure) -> List[ResidueDict]: """KDTree search the neighbors of a given list of residues(which constitue a ligand) and return unique having tagged them with a ban identifier proteins within 5 angstrom of these residues. """ ns = NeighborSearch(list(struct.get_atoms())) nbrs = [] for r in resids: # a ligand consists of residues resatoms = r.child_list[0] # each residue has an atom plucked at random for nbrresidues in ns.search(resatoms.get_coord(), 5, level='R'): # we grab all residues in radius around that atom and extend the list of neighbors with those nbrs.extend([nbrresidues]) # Filter out the residues that constitute the ligand itself filtered = [] for neighbor in nbrs: present = 0 for constit in resids: if ResidueDict(constit) == ResidueDict(neighbor): present = 1 if present == 0: filtered.append(ResidueDict(neighbor)) return [*map(lambda x: addBanClass(x), set(filtered))]
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertTrue(isinstance(hits, list), hits) self.assertTrue(len(hits) >= 0, hits) x = array([250,250,250]) #Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def test_neighbor_search(self): """NeighborSearch: Find nearby randomly generated coordinates. Based on the self test in Bio.PDB.NeighborSearch. """ class RandomAtom: def __init__(self): self.coord = 100 * random(3) def get_coord(self): return self.coord for i in range(0, 20): atoms = [RandomAtom() for j in range(100)] ns = NeighborSearch(atoms) hits = ns.search_all(5.0) self.assertIsInstance(hits, list) self.assertGreaterEqual(len(hits), 0) x = array([250, 250, 250]) # Far away from our random atoms self.assertEqual([], ns.search(x, 5.0, "A")) self.assertEqual([], ns.search(x, 5.0, "R")) self.assertEqual([], ns.search(x, 5.0, "C")) self.assertEqual([], ns.search(x, 5.0, "M")) self.assertEqual([], ns.search(x, 5.0, "S"))
def getLigandNbrs(resids: List[Residue], struct:Structure): ns = NeighborSearch(list( struct.get_atoms() )) nbrs = [] for r in resids: # a ligand consists of residues resatom = r.child_list[0] # each residue has an atom plucked at random for nbr in ns.search(resatom.get_coord(), 5,level='R'): # we grab all residues in radius around that atom and extend the list with those nbrs.extend([* nbr]) filtered = [] for neighor in nbrs: present = 0 for constit in resids: if ResidueId( constit ) == ResidueId( neighor ): present = 1 if present == 0: filtered.append(ResidueId(neighor)) return [ * map(lambda x: addBanClass(x) , set(filtered) ) ]
def getResidueNeighbors(struct:Structure, resid: ResidueFullIdDict, radius:float, level:str = 'R', all_levels=False)-> List[Residue] or List[Chain] or List[Chain or Residue]: """ struct: opened cif structure resid: A dictionary containing the residue's identifiers (see associated class) radiue: radius of neighborhood to check level: Atom, Residue, Chain : one of A/R/C """ if level.upper() not in ['A', 'R', 'C']: print('Level has to be one of A R C') raise Error parentStrand :Chain = struct[resid.model][resid.strand_id] residuesOfInterest:List[Residue] = list(filter(lambda x : x.get_full_id()[3][1] == resid.residue_id, parentStrand.child_list)) ligandAtoms = residuesOfInterest[0].get_atoms() coords = list( map(lambda atom: atom.get_coord(), ligandAtoms) ) ns = NeighborSearch(list( struct.get_atoms() )) yield ns.search(coords[0],radius,level.upper())
def compute_interactions(pdb_name, save_to_db=False): do_distances = True do_interactions = True distances = [] classified = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get( protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude( generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1, res1 in enumerate(chain, 1): if not is_water(res1): for i2, res2 in enumerate(chain, 1): if i2 > i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA'] - res2['CA'] distances.append( (dbres[res1.id[1]], dbres[res2.id[1]], distance, dblabel[res1.id[1]], dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(6.6, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [ pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1]) ] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [ pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES ] # For each pair of interacting residues, determine the type of interaction interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]], struc) for res_pair in all_aa_neighbors if not is_water(res_pair[0]) and not is_water(res_pair[1]) ] # Split unto classified and unclassified. classified = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter( referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [ (water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res) and ( is_hba(match_res) or is_hbd(match_res)) ] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one + 1, len(water_neighbors)) if water_pair_one[0] == water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] # TODO: order residues + check minimum spacing between residues key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) # Verify h-bonds between water and both residues matches_one = InteractingPair.verify_water_hbond( water_pair_one[1], water_pair_one[0]) matches_two = InteractingPair.verify_water_hbond( water_pair_two[1], water_pair_two[0]) if len(matches_one) > 0 and len(matches_two) > 0: # if not exists, create residue pair without interactions if not key in interaction_pairs: interaction_pairs[key] = InteractingPair( res_1, res_2, dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for a, b in zip(matches_one, matches_two): # HACK: store water ID as part of first atom name interaction_pairs[key].interactions.append( WaterMediated( a + "|" + str(water_pair_one[0].get_parent( ).get_id()[1]), b)) for p in classified: p.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i, d in enumerate(distances): distance = Distance(distance=int(100 * d[2]), res1=d[0], res2=d[1], gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3], d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances) > 1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
def compute_interactions(pdb_name, save_to_db=False): do_distances = True do_interactions = True do_complexes = True distances = [] classified = [] classified_complex = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get( protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude( generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1, res1 in enumerate(chain, 1): if not is_water(res1): for i2, res2 in enumerate(chain, 1): if i2 > i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA'] - res2['CA'] distances.append( (dbres[res1.id[1]], dbres[res2.id[1]], distance, dblabel[res1.id[1]], dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(6.6, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [ pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1]) ] # Only include contacts between residues more than NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [ pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES ] # For each pair of interacting residues, determine the type of interaction interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]], struc) for res_pair in all_aa_neighbors if not is_water(res_pair[0]) and not is_water(res_pair[1]) ] # Split unto classified and unclassified. classified = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] if do_complexes: try: # check if structure in signprot_complex complex = SignprotComplex.objects.get(structure=struc) # Get all GPCR residue atoms based on preferred chain gpcr_atom_list = [ atom for residue in Selection.unfold_entities(s[preferred_chain], 'R') if is_aa(residue) \ for atom in residue.get_atoms()] # Get all residue atoms from the coupled protein (e.g. G-protein) # NOW: select alpha subnit protein chain using complex model sign_atom_list = [ atom for residue in Selection.unfold_entities(s[complex.alpha], 'R') if is_aa(residue) \ for atom in residue.get_atoms()] ns_gpcr = NeighborSearch(gpcr_atom_list) ns_sign = NeighborSearch(sign_atom_list) # For each GPCR atom perform the neighbor search on the signaling protein all_neighbors = { (gpcr_atom.parent, match_res) for gpcr_atom in gpcr_atom_list for match_res in ns_sign.search(gpcr_atom.coord, 4.5, "R") } # For each pair of interacting residues, determine the type of interaction residues_sign = ProteinConformation.objects.get( protein__entry_name=pdb_name + "_" + complex.alpha.lower()).residue_set.exclude( generic_number=None).all().prefetch_related( 'generic_number') # grab labels from sign protein dbres_sign = {} dblabel_sign = {} for r in residues_sign: dbres_sign[r.sequence_number] = r dblabel_sign[r.sequence_number] = r.generic_number.label # Find interactions interactions = [ InteractingPair(res_pair[0], res_pair[1], dbres[res_pair[0].id[1]], dbres_sign[res_pair[1].id[1]], struc) for res_pair in all_neighbors if res_pair[0].id[1] in dbres and res_pair[1].id[1] in dbres_sign ] # Filter unclassified interactions classified_complex = [ interaction for interaction in interactions if len(interaction.get_interactions()) > 0 ] # Convert to dictionary for water calculations interaction_pairs = {} for pair in classified_complex: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # Obtain list of all water molecules in the structure water_list = { water for chain in s for residue in chain if residue.get_resname() == "HOH" for water in residue.get_atoms() } # If waters are present calculate water-mediated interactions if len(water_list) > 0: ## Iterate water molecules over coupled and gpcr atom list water_neighbors_gpcr = { (water, match_res) for water in water_list for match_res in ns_gpcr.search(water.coord, 3.5, "R") } water_neighbors_sign = { (water, match_res) for water in water_list for match_res in ns_sign.search(water.coord, 3.5, "R") } # TODO: DEBUG AND VERIFY this code as water-mediated interactions were present at this time # 1. UPDATE complexes to include also mini Gs and peptides (e.g. 4X1H/6FUF/5G53) # 2. Run and verify water-mediated do_interactions # 3. Improve the intersection between the two hit lists ## TODO: cleaner intersection between hits from the two Lists # see new code below # for water_pair_one in water_neighbors_gpcr: # for water_pair_two in water_neighbors_sign: # if water_pair_one[0]==water_pair_two[0]: # res_1 = water_pair_one[1] # res_2 = water_pair_two[1] # key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) # Check if interaction is polar # if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])): # TODO Check if water interaction is already present (e.g. multiple waters) # TODO Is splitting of sidechain and backbone-mediated interactions desired? # if not key in interaction_pairs: # interaction_pairs[key] = InteractingPair(res_1, res_2, dbres[res_1.id[1]], dbres_sign[res_2.id[1]], struc) # TODO: fix assignment of interacting atom labels (now seems limited to residues) # interaction_pairs[key].interactions.append(WaterMediated(a + "|" + str(water_pair_one[0].get_parent().get_id()[1]), b)) except SignprotComplex.DoesNotExist: # print("No complex definition found for", pdb_name) log = "No complex definition found for " + pdb_name except ProteinConformation.DoesNotExist: print( "No protein conformation definition found for signaling protein of ", pdb_name) # log = "No protein conformation definition found for signaling protein of " + pdb_name if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter( referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [ (water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res) and ( is_hba(match_res) or is_hbd(match_res)) ] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one + 1, len(water_neighbors)) if water_pair_one[0] == water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] # TODO: order residues + check minimum spacing between residues key = res_1.get_parent().get_id() + str(res_1.get_id( )[1]) + "_" + res_2.get_parent().get_id() + str( res_2.get_id()[1]) # Verify h-bonds between water and both residues matches_one = InteractingPair.verify_water_hbond( water_pair_one[1], water_pair_one[0]) matches_two = InteractingPair.verify_water_hbond( water_pair_two[1], water_pair_two[0]) if len(matches_one) > 0 and len(matches_two) > 0: # if not exists, create residue pair without interactions if not key in interaction_pairs: interaction_pairs[key] = InteractingPair( res_1, res_2, dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for a, b in zip(matches_one, matches_two): # HACK: store water ID as part of first atom name interaction_pairs[key].interactions.append( WaterMediated( a + "|" + str(water_pair_one[0].get_parent( ).get_id()[1]), b)) for p in classified: p.save_into_database() if do_complexes: for pair in classified_complex: pair.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i, d in enumerate(distances): distance = Distance(distance=int(100 * d[2]), res1=d[0], res2=d[1], gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3], d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances) > 1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances
def find_ligands(self, radius=3): """ Searches the parent structure for hetero components close to the catalytic residues, by searching around the atoms of catalytic residues and the dummy atoms between distant residues. Populates the ligands list with Het objects. Args: radius: the search space (in Å) around the atoms of the catalytic residues """ if type(self.parent_structure) != Structure: return # Get centers of search centers = self._get_ligand_search_centers(radius) # Initialize KD tree query_atoms = Bio.PDB.Selection.unfold_entities( self.parent_structure, 'A') ns = NeighborSearch(query_atoms) # Search for ligands around each center polymers = defaultdict(list) site_chains = set([res.chain for res in self]) seen = set() added = set() # Search for components close to catalytic residues for center in centers: hits = ns.search(center, radius, level='R') for res in hits: if res.get_full_id() in seen: continue seen.add(res.get_full_id()) restype = res.get_id()[0][0] chain = res.get_parent().get_id() # Ignore waters if restype == 'W': continue # HET components if restype == 'H': self.add( Het(self.mcsa_id, self.pdb_id, res.get_resname(), res.get_id()[1], chain, structure=res, parent_site=self)) added.add(res.get_full_id()) # Protein/nucleic polymer components if restype == ' ' and chain not in site_chains: polymers[chain].append(res) # Build polymers if self.acts_on_polymer: for chain, reslist in polymers.items(): self.add( Het.polymer(reslist, self.mcsa_id, self.pdb_id, chain, self)) # Find distal co-factor-like or substrate-like molecules hits = ns.search(self.structure.center_of_mass(geometric=True), 30, level='R') for res in hits: restype = res.get_id()[0][0] if restype == 'H' and res.get_full_id() not in added: ligand = Het(self.mcsa_id, self.pdb_id, res.get_resname(), res.get_id()[1], res.get_parent().get_id(), structure=res, parent_site=self) if ligand.type in ('Substrate (non-polymer)', 'Co-factor (non-ion)'): ligand.is_distal = True self.add(ligand) return
def compute_interactions(pdb_name,save_to_db = False): do_distances = True do_interactions = True distances = [] classified = [] # Ensure that the PDB name is lowercase pdb_name = pdb_name.lower() # Get the pdb structure struc = Structure.objects.get(protein_conformation__protein__entry_name=pdb_name) pdb_io = StringIO(struc.pdb_data.pdb) # Get the preferred chain preferred_chain = struc.preferred_chain.split(',')[0] # Get the Biopython structure for the PDB s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_io)[0] #s = pdb_get_structure(pdb_name)[0] chain = s[preferred_chain] #return classified, distances # remove residues without GN and only those matching receptor. residues = struc.protein_conformation.residue_set.exclude(generic_number=None).all().prefetch_related('generic_number') dbres = {} dblabel = {} for r in residues: dbres[r.sequence_number] = r dblabel[r.sequence_number] = r.generic_number.label ids_to_remove = [] for res in chain: if not res.id[1] in dbres.keys() and res.get_resname() != "HOH": ids_to_remove.append(res.id) for i in ids_to_remove: chain.detach_child(i) if do_distances: for i1,res1 in enumerate(chain,1): if not is_water(res1): for i2,res2 in enumerate(chain,1): if i2>i1 and not is_water(res2): # Do not calculate twice. distance = res1['CA']-res2['CA'] distances.append((dbres[res1.id[1]],dbres[res2.id[1]],distance,dblabel[res1.id[1]],dblabel[res2.id[1]])) if do_interactions: atom_list = Selection.unfold_entities(s[preferred_chain], 'A') # Search for all neighbouring residues ns = NeighborSearch(atom_list) all_neighbors = ns.search_all(4.5, "R") # Filter all pairs containing non AA residues all_aa_neighbors = [pair for pair in all_neighbors if is_aa(pair[0]) and is_aa(pair[1])] # Only include contacts between residues less that NUM_SKIP_RESIDUES sequence steps apart all_aa_neighbors = [pair for pair in all_aa_neighbors if abs(pair[0].id[1] - pair[1].id[1]) > NUM_SKIP_RESIDUES] # For each pair of interacting residues, determine the type of interaction interactions = [InteractingPair(res_pair[0], res_pair[1], get_interactions(res_pair[0], res_pair[1]),dbres[res_pair[0].id[1]], dbres[res_pair[1].id[1]],struc) for res_pair in all_aa_neighbors] # Split unto classified and unclassified. classified = [interaction for interaction in interactions if len(interaction.get_interactions()) > 0] if save_to_db: if do_interactions: # Delete previous for faster load in InteractingResiduePair.objects.filter(referenced_structure=struc).all().delete() # bulk_pair = [] # for d in distances: # pair = InteractingResiduePair(res1=d[0], res2=d[1], referenced_structure=struc) # bulk_pair.append(pair) # Create interaction dictionary interaction_pairs = {} for pair in classified: res_1 = pair.get_residue_1() res_2 = pair.get_residue_2() key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) interaction_pairs[key] = pair # POSSIBLE ADDON: support for multiple water-mediated bonds ## Obtain list of water molecules water_list = { water for residue in s[preferred_chain] if residue.get_resname() == "HOH" for water in residue.get_atoms() } if len(water_list) > 0: ## Iterate water molecules over residue atom list water_neighbors = [(water, match_res) for water in water_list for match_res in ns.search(water.coord, 3.5, "R") if not is_water(match_res)] # intersect between residues sharing the same interacting water for index_one in range(len(water_neighbors)): water_pair_one = water_neighbors[index_one] for index_two in [ index for index in range(index_one+1, len(water_neighbors)) if water_pair_one[0]==water_neighbors[index][0] ]: water_pair_two = water_neighbors[index_two] res_1 = water_pair_one[1] res_2 = water_pair_two[1] key = res_1.get_parent().get_id()+str(res_1.get_id()[1]) + "_" + res_2.get_parent().get_id()+str(res_2.get_id()[1]) # Check if interaction is polar - NOTE: this is not capturing every angle if any(get_polar_interactions(water_pair_one[0].get_parent(), water_pair_one[1])) and any(get_polar_interactions(water_pair_two[0].get_parent(), water_pair_two[1])): # NOTE: Is splitting of sidechain and backbone-mediated interactions desired? if key in interaction_pairs: interaction_pairs[key].interactions.append(WaterMediated()) else: interaction_pairs[key] = InteractingPair(res_1, res_2, [WaterMediated()], dbres[res_1.id[1]], dbres[res_2.id[1]], struc) for p in classified: p.save_into_database() if do_distances: # Distance.objects.filter(structure=struc).all().delete() bulk_distances = [] for i,d in enumerate(distances): distance = Distance(distance=int(100*d[2]),res1=d[0], res2=d[1],gn1=d[3], gn2=d[4], gns_pair='_'.join([d[3],d[4]]), structure=struc) bulk_distances.append(distance) if len(bulk_distances)>1000: pairs = Distance.objects.bulk_create(bulk_distances) bulk_distances = [] pairs = Distance.objects.bulk_create(bulk_distances) return classified, distances