def extract_feature(self): seed(self.seed) counter = 0 print_info_nn(" >>> Adding D2 category based shape distribution for database {0} ... ".format(self._database.name)) overall_time = datetime.now() if not os.path.exists(self._get_dir_name()): os.makedirs(self._get_dir_name()) for complex_name in self._database.complexes.keys(): protein_complex = self._database.complexes[complex_name] proteins = [protein_complex.unbound_formation.ligand, protein_complex.unbound_formation.receptor] for protein in proteins: shape_dist_file = self._get_dir_name() + protein.name if not os.path.exists(shape_dist_file + ".npy"): counter += 1 if counter <= 15: print_info_nn("{0}, ".format(protein.name)) else: counter = 0 print_info("{0}".format(protein.name)) atoms = protein.atoms neighbour_search = NeighborSearch(atoms) distributions = np.zeros((len(protein.residues), self.number_of_bins)) for i in range(len(protein.residues)): residue = protein.residues[i] nearby_residues = neighbour_search.search(residue.center, self.radius, "R") distributions[i, :] = self._compute_distribution(nearby_residues) np.save(shape_dist_file, distributions) distributions = np.load(shape_dist_file + ".npy") for i in range(len(protein.residues)): protein.residues[i].add_feature(Features.D2_CATEGORY_SHAPE_DISTRIBUTION, distributions[i, :]) print_info("took {0} seconds.".format((datetime.now() - overall_time).seconds))
def pdb_neighbors(pdb_f, pdb_id): structure = PDBParser().get_structure(pdb_id, pdb_f) atom_list = Selection.unfold_entities(structure, 'A') ns = NeighborSearch(atom_list) center_res = [res for res in structure.get_residues() if res.get_resname() in ['PTR','SEP','TPO']] neighbors = [] for res in center_res: if res.get_resname() == 'PTR': atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OH']] elif res.get_resname() == 'SEP': atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG']] elif res.get_resname() == 'TPO': atoms = [atom for atom in res.child_list if atom.get_name() in ['O1P', 'O2P', 'O3P', 'OG1']] atom_neighbors = [ns.search(a.get_coord(),BOND_CUTOFF) for a in atoms] atom_neighbors = [atom for atoms in atom_neighbors for atom in atoms] atom_neighbors = list(set(atom_neighbors)) atom_neighbors = [atom for atom in atom_neighbors if 'N' in atom.get_name() or 'O' in atom.get_name()] atom_neighbors = list(set(Selection.unfold_entities(atom_neighbors,'R'))) atom_neighbors = [r for r in atom_neighbors if r != res] if len(atom_neighbors) > 0: res = res.get_resname()+'_'+str(res.get_id()[1])+'_'+res.get_parent().get_id() atom_neighbors = [n.get_resname()+'_'+str(n.get_id()[1])+'_'+n.get_parent().get_id() for n in atom_neighbors] neighbors.append((pdb_id,res,atom_neighbors)) return neighbors
def within(resnum, angstroms, chain_id, model, use_ca=False, custom_coord=None): """See: https://www.biostars.org/p/1816/ https://www.biostars.org/p/269579/ Args: resnum (int): angstroms (float): chain_id (str): model (Model): use_ca (bool): If the alpha-carbon atom should be used for the search, otherwise use the last atom of the residue custom_coord (list): Custom XYZ coordinate to get within Returns: list: List of Bio.PDB.Residue.Residue objects """ # XTODO: documentation # TODO: should have separate method for within a normal residue (can use "resnum" with a int) or a custom coord, # where you don't need to specify resnum atom_list = Selection.unfold_entities(model, 'A') ns = NeighborSearch(atom_list) if custom_coord: # a list of XYZ coord target_atom_coord = np.array(custom_coord, 'f') else: target_residue = model[chain_id][resnum] if use_ca: target_atom = target_residue['CA'] else: target_atom = target_residue.child_list[-1] target_atom_coord = np.array(target_atom.get_coord(), 'f') neighbors = ns.search(target_atom_coord, angstroms) residue_list = Selection.unfold_entities(neighbors, 'R') return residue_list
def get_residues_distance_distribution(data, r): """ This function computes the distribution of number of residues in certain radius around residues. """ files = glob.glob(data + "*_b.pdb") dist = {} files.sort() file_counter = 0 for bound_pbd in files: dist[bound_pbd] = [] l = [] for bound_pbd in files: file_counter += 1 print "Protein " + str(file_counter) + "/" + str(len(files)) s, a, r = read_pdb_file(bound_pbd) ns = NeighborSearch(a) res_counter = 0 for res in r: b = 0 * res.child_list[0].get_coord() for atom in res.child_list: b += atom.get_coord() center = b / len(res.child_list) l.append(len(ns.search(center, 100, "R"))) res_counter += 1 # print "Residue " + str(res_counter) + "out of " + str(len(r)) plot(np.bincount(l)) show() print files
def search_connectivity(self, atom_center, rank, NS=None, previous=None): if NS is None: NS = NeighborSearch(self.model.atoms) resultat = [ atom for atom in NS.search(center=atom_center.get_coord(), radius=parametres.cutoff_bonds, level="A") if atom not in [atom_center, previous] ] if rank == 1: return resultat else: connectivity = {} for atom in resultat: resultat_temp = self.search_connectivity(atom_center=atom, rank=rank - 1, NS=NS, previous=atom_center) if len(resultat_temp) == 0: connectivity[atom] = None else: connectivity[atom] = resultat_temp return connectivity
def __init__(self, structure, chain_id, threshold): ns = NeighborSearch(list(structure.get_atoms())) for chain in structure[0]: if chain.id == chain_id: for residue in chain: if residue.id[0] == ' ': inter = 0 intra = 0 lon = 0 shor = 0 center = get_center(residue) for a in ns.search(center, threshold): # Iterate over contacts if a.get_full_id()[2] == residue.get_full_id()[2]: #if abs(int(a.get_full_id()[1]) - int(residue.get_full_id()[1])) > 3: intra += 1 else: inter += 1 if np.linalg.norm(a.get_coord()-center) < threshold/2: shor += 1 else: lon += 1 self.contacts[residue.get_full_id()] = (inter, intra, lon, shor)
def calculate_ic(structure, d_cutoff=5.0, selection=None): """ Calculates intermolecular contacts in a parsed structure object. """ atom_list = list(structure.get_atoms()) ns = NeighborSearch(atom_list) all_list = ns.search_all(radius=d_cutoff, level="R") if selection: _sd = selection def _chain(x): return x.parent.id ic_list = [ c for c in all_list if (_chain(c[0]) in _sd and _chain(c[1]) in _sd) and ( _sd[_chain(c[0])] != _sd[_chain(c[1])]) ] else: ic_list = [c for c in all_list if c[0].parent.id != c[1].parent.id] if not ic_list: raise ValueError("No contacts found for selection") return ic_list
def search_subs( self, hx_atom, cx_atom, cy_atom, hy_atom, j_atoms, chosen_j ): """ Description: Usage: Parameters: """ subs_list = [] # j_atoms = [j[0] for j in j_atoms] center = self.atom_dict[cy_atom][2] ns = NeighborSearch(self.atom_list) neighbors = ns.search(center, 1.7) for neigh_atom in neighbors: if neigh_atom.serial_number not in j_atoms: subs_list.append(neigh_atom) for subs in subs_list: self.j_dict[chosen_j]["substituents"][subs.serial_number] = { "SY": self.atom_dict[subs.serial_number][0], "HX": self.atom_dict[hx_atom][0], "CX": self.atom_dict[cx_atom][0], "CY": self.atom_dict[cy_atom][0], "element": self.atom_dict[subs.serial_number][1] }
def _get_contacts(pdb_path, chain_rec, chain_lig, contact_dist): structure = pdb_parser.get_structure('X', pdb_path)[0] receptor = [structure[chain_rec_id] for chain_rec_id in chain_rec] ligand = [structure[chain_lig_id] for chain_lig_id in chain_lig] receptor_atoms = Selection.unfold_entities(receptor, 'A') ns = NeighborSearch(receptor_atoms) ligand_residues = Selection.unfold_entities(ligand, 'R') contacts = set([]) contacts_lig = set([]) contacts_rec = set([]) for ligand_res in ligand_residues: lig_resname = dindex_to_1[d3_to_index[ligand_res.get_resname()]] lig_resnum = ligand_res.get_id()[1] lig_chname = ligand_res.get_parent().get_id() res_contacts = [] for lig_atom in ligand_res: neighbors = ns.search(lig_atom.get_coord(), contact_dist) res_contacts += Selection.unfold_entities(neighbors, 'R') for receptor_res in res_contacts: rec_resname = dindex_to_1[d3_to_index[receptor_res.get_resname()]] rec_resnum = receptor_res.get_id()[1] rec_chname = receptor_res.get_parent().get_id() contacts.add((rec_resname, rec_resnum, rec_chname, lig_resname, lig_resnum, lig_chname)) contacts_lig.add((lig_resname, lig_resnum, lig_chname)) contacts_rec.add((rec_resname, rec_resnum, rec_chname)) return contacts, contacts_rec, contacts_lig
def pdb_neighbors(pdb_f, pdb_id): structure = PDBParser().get_structure(pdb_id, pdb_f) atom_list = Selection.unfold_entities(structure, 'A') ns = NeighborSearch(atom_list) center = [(a, a.get_coord()) for a in structure.get_atoms() if a.get_parent().get_resname() in ['PTR', 'SEP', 'TPO'] and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']] # if there are no phos-atomes, return if len(center) == 0: return '' neighbors = {} for a, c in center: # set neighbor distance cutoff neighbor_list = ns.search(c, BOND_CUTOFF) residue_list = list(set(Selection.unfold_entities(neighbor_list, 'R'))) try: neighbors[Selection.unfold_entities(a, 'R')[0]] = [ x for x in residue_list if not x == Selection.unfold_entities(a, 'R')[0] ] except: continue # add residue id and chain id neighbors_full = dict([('_'.join( [k.get_resname(), str(k.get_id()[1]), str(k.get_parent().get_id())]), [ '_'.join([ vi.get_resname(), str(vi.get_id()[1]), str(vi.get_parent().get_id()) ]) for vi in v ]) for k, v in neighbors.iteritems()]) return neighbors_full
def collectSaltBridge(self, position, chain): model = self.structure[0] myres = model[chain][position] atoms = Selection.unfold_entities(model, 'A') # A for atoms ns = NeighborSearch(atoms) if myres.get_resname() not in ['ARG', 'LYS', 'ASP', 'GLU', 'HIS']: is_sb = 0 else: for atom in myres: atom_id = atom.get_full_id() if atom_id[4][0] in ['NH1', 'NH2', 'NZ', 'NE2']: close_atoms = ns.search( atom.coord, 4.5 ) # cutoff of 4 crieria fixed by Barlow, J M Thornton (PMID6887253) +0.5A to account for the unoptimised side chain if any(atom in [atomtype.id for atomtype in close_atoms] for atom in ['OE1', 'OE2', 'OD1', 'OD2']): is_sb = 1 break else: is_sb = 0 break elif atom_id[4][0] in ['OE1', 'OE2', 'OD1', 'OD2']: close_atoms = ns.search(atom.coord, 4.5) if any(atom in [atomtype.id for atomtype in close_atoms] for atom in ['NH1', 'NH2', 'NZ', 'NE2']): is_sb = 1 break else: is_sb = 0 break return is_sb
def collectHbondsNumber(self, foldxIndivFile): wt_mut_chain_id = whatMutation(foldxIndivFile) prot_chain = wt_mut_chain_id[3] prot_mut_id = int(wt_mut_chain_id[2]) model = self.structure[0] myres = model[prot_chain][prot_mut_id] atoms = Selection.unfold_entities(model, 'A') # A for atoms ns = NeighborSearch(atoms) resname = myres.get_resname() h_bond_as_donor = [] h_bond_as_acceptor = [] for atom in myres: if atom.name in donors: #atoms = Selection.unfold_entities(model, 'A') #print(atoms) # cutoff of 3.2 angstroms D-A, strong mostly covalent according to # Jeffrey, George A.; An introduction to hydrogen bonding, Oxford University Press, 1997. close_atoms = ns.search(atom.coord, 3.2) for close_atom in close_atoms: full_atom_id = close_atom.get_full_id() if (full_atom_id[3][1] != prot_mut_id) and ( full_atom_id[4][0] in acceptors + main_chain_acceptors): acceptor_atom_id = full_atom_id[3][1] h_bond_as_donor.append(acceptor_atom_id) # do the same thing fro acceptor atoms if atom.name in acceptors: close_atoms = ns.search(atom.coord, 3.2) for close_atom in close_atoms: full_atom_id = close_atom.get_full_id() if (full_atom_id[3][1] != prot_mut_id) and ( full_atom_id[4][0] in donors + main_chain_donors): acceptor_atom_id = full_atom_id[3][1] h_bond_as_acceptor.append(acceptor_atom_id) return len(h_bond_as_donor) + len(h_bond_as_acceptor)
def __calculate_separate_simbox_statistics(self, component, radius): #, cells_taken_by_component): """ calculates the number of grid cells describing each complex component. calculation is done when all components are located in the center of simulation box """ self.cells_taken_by_component = {} grid = Grid("cubic", radius, 10*radius, -1.) grid.calculate_boundaries(component.pyrystruct.struct) grid.generate_cubic_grid([]) #scitree = scipyconverter(grid.grid_cells) ns = NeighborSearch(grid.grid_cells) for atom in component.pyrystruct.struct.get_atoms(): center = array([atom.coord[0], atom.coord[1], atom.coord[2]]) if atom.vdw <= radius: rad = radius* sqrt(3) else: rad = (radius*sqrt(3)) + atom.vdw neighbours = ns.search(center, rad, 'A') #atom.vdw + radius #neighbours = scitree.run_neisearch(center, rad, eps=0) self.cells_taken_by_component = self.__calculate_simbox_statistics(neighbours) #, cells_taken_by_component) if len(neighbours) == 0: print "OMGOMG"*10, component.pyrystruct.chain, atom.serial_number del grid del ns neighbours = [] return len(self.cells_taken_by_component)
def CheckClashes(structure, chain): """ Checks for clashes at a radius = 2 between a PDB structure and all the atoms on the provided chain. Returns True or False depending if number of different residues clashing exceeds 25. Arguments: -structure: PDB structure. -chain: PDB chain structure to check if it has clashes with the main structure. """ # declare NeighborSearch() object instance with all the atoms from the structure (model 0), that includes all chains of that structure. ns = NeighborSearch(unfold_entities(structure[0], 'A')) # iterate over atoms in input chain, search for close residues clashing_residues = set([]) for atom in chain.get_atoms(): close_res = ns.search(atom.get_coord(), radius=2, level="R") try: close_res.remove(atom.get_parent()) except ValueError: pass for res in close_res: neighbor_res = (atom.get_parent(), res) clashing_residues.add(neighbor_res) if len(clashing_residues) > 25: return True return False
def contact_pairs(self): """obtain all residue pairs with atoms in close proximity""" radius = 4.5 ns = NeighborSearch(self.atom_list) self.contact_pairs = ns.search_all(radius, level='R') #residues returned
def get_neighbor_chains(structure, options): """ Takes an structure and returns a dictionary with chains as keys and a list of chains as values holding the chains with alpha carbons at less than 8 amstrongs from an alpha carbon of the key chain :param structure: structure we want to check for clashes. :return: dictionary with the clashes between chains. """ neighbor_chains = {} ns = NeighborSearch(list(structure.get_atoms())) for chain in structure.get_chains(): chains = list(structure.get_chains()) neighbor_chains[chain] = set([]) neighbor_dict = {} for atom in [atom for atom in chain.get_atoms() if atom.get_id() == 'CA' or atom.get_id() == 'P']: # For every alpha carbon or P in chain for atom2 in ns.search(atom.get_coord(), 8, level='A'): if atom2.get_id() == 'CA' or atom2.get_id() == 'P': # for every alpha carbon or P at 8 angstroms or less from atom chain2 = atom2.get_parent().get_parent() # Getting to which chain it belongs if chain2 != chain and chain2 not in neighbor_chains.keys(): # If it is not in the same chain and it is not already assessed if chain2 not in neighbor_dict: neighbor_dict[chain2] = 0 neighbor_dict[chain2] += 1 if options.verbose: print('\n%s' % chain) for close_chain, contacts in neighbor_dict.items(): print('%s: %s' % (close_chain, contacts)) if contacts > 8: neighbor_chains[chain].add(close_chain) return neighbor_chains
def check_clash(str_name, v=False): """check_clash, fract of clashes! if zero contacts then error -> fix -> Problem, contacts, str_name: 311 505 na-prot_13536.pdb Sterical clashes 0.615841584158 c is counter """ if v: print('fn:', str_name) structure = open(str_name) #model = structure[0] atoms_A = [] atoms_B = [] for line in structure.readlines(): if line[:4] == "ATOM": #print line at_nam = line[12:16].strip() coor = [float(line[30:38]), float(line[38:46]), float(line[46:54])] at = Atom.Atom(at_nam, coor, 0.0, 1.0, ' ', at_nam, 1, at_nam[0]) if line[21] == "A": atoms_A.append(at) elif line[21] == "B": atoms_B.append(at) else: pass #atoms_B = Selection.unfold_entities(structure[0]['B'], 'A') #print len(atoms_A), len(atoms_B) if len(atoms_A) > len(atoms_B): less = atoms_B more = atoms_A else: less = atoms_A more = atoms_B problem = 0 contacts = 0 ns = NeighborSearch(more) for at in less: neighbors = ns.search(array(at.get_coord()), 2.0, 'A') if neighbors != []: problem += 1 contacts += 1 else: neighbors1 = ns.search(array(at.get_coord()), 4.0, 'A') if neighbors1 != []: contacts += 1 if v: print('problem:', float(problem)) print('contacts:', float(contacts)) try: fract = float(problem) / float(contacts) except ZeroDivisionError: fract = problem # or skip this structure if v: print('ZeroDivison -- skip:', problem, contacts, str_name) return fract #print 'Contacts, str_name:', problem, contacts, str_name, "Sterical clashes ", fract return fract
def check_clash(str_name, v=True): """check_clash, fract of clashes! if zero contacts then error -> fix -> Problem, contacts, str_name: 311 505 na-prot_13536.pdb Sterical clashes 0.615841584158 c is counter """ print(str_name) structure = open(str_name) #model = structure[0] atoms_A = [] atoms_B = [] for line in structure.readlines(): if line[:4] == "ATOM": #print line at_nam = line[12:16].strip() coor = [float(line[30:38]),float(line[38:46]), float(line[46:54])] at = Atom.Atom(at_nam,coor,0.0,1.0,' ',at_nam,1,at_nam[0]) if line[21] == "A": atoms_A.append(at) elif line[21] == "B": atoms_B.append(at) else: pass #atoms_B = Selection.unfold_entities(structure[0]['B'], 'A') #print len(atoms_A), len(atoms_B) if len(atoms_A) > len(atoms_B): less = atoms_B more = atoms_A else: less = atoms_A more = atoms_B problem = 0 contacts = 0 ns=NeighborSearch(more) for at in less: neighbors=ns.search(array(at.get_coord()),2.0,'A') if neighbors != []: problem +=1 contacts +=1 else: neighbors1=ns.search(array(at.get_coord()),4.0,'A') if neighbors1 != []: contacts +=1 if v: print('problem:', float(problem)) print('contacts:', float(contacts)) try: fract = float(problem)/float(contacts) except ZeroDivisionError: fract = problem # or skip this structure print('ZeroDivison -- skip:', problem, contacts, str_name) return fract #print 'Contacts, str_name:', problem, contacts, str_name, "Sterical clashes ", fract return fract
def compute_interaction_center(pdb_file, mutation_site): """Computes the geometric center of all heavy atoms interacting with the mutated residue. Parameters ---------- pdb_file : str Path to the PDB file containing the structure of the protein. mutation_size : int An integer designating the residue sequence ID. Returns ------- NumPy ndarray The Cartesian coordinates of the geometric center """ pdb_parser = PDBParser(PERMISSIVE=1) model = pdb_parser.get_structure(id='tmp', file=pdb_file) # get all heavy atoms of the protein all_heavy_atoms = [a for a in model.get_atoms() if a.element != 'H'] # get the mutated residue mutation_res = None for res in model.get_residues(): if res.get_id()[1] == int(mutation_site): mutation_res = res break # get sidechain atoms if mutation_res is not None: heavy_atoms = [a for a in mutation_res.get_list() if a.element != 'H'] # if only four heavy atoms, aka, GLY, use CA if len(heavy_atoms) == 4: side_chain_atoms = [heavy_atoms[1]] else: side_chain_atoms = mutation_res.get_list()[4:] else: print('Invalid mutation site: {}'.format(mutation_site)) sys.exit(1) # search for neighbnoring atoms ns = NeighborSearch(atom_list=all_heavy_atoms, bucket_size=10) all_interaction_atoms = [] for a in side_chain_atoms: interaction_atoms = ns.search(center=a.coord, radius=5, level='A') all_interaction_atoms += interaction_atoms # remove duplicates all_interaction_atoms = set(all_interaction_atoms) # compute geometric center of all interaction atoms geometric_center = np.zeros((3,)) for a in all_interaction_atoms: geometric_center += a.coord / len(all_interaction_atoms) return geometric_center
def get_interactions_between_chains(model, chain_id_1, chain_id_2, r_cutoff=6): """Calculate interactions between the residues of the two chains. An interaction is defines as a pair of residues where at least one pair of atom is closer than r_cutoff. .. deprecated:: 1.0 Use python:fn:`get_interacting_residues` instead. It gives you both the residue index and the resnum. Returns ------- OrderedDict Keys are (residue_number, residue_amino_acid) tuples (e.g. ('0', 'M'), ('1', 'Q'), ...). Values are lists of (residue_number, residue_amino_acid) tuples. (e.g. [('0', 'M'), ('1', 'Q'), ...]). """ try: from Bio.PDB import NeighborSearch except ImportError as e: logger.warning('Importing Biopython NeighborSearch returned an error: {}'.format(e)) logger.warning('Using the the slow version of the neighbour-finding algorithm...') return get_interactions_between_chains_slow(model, chain_id_1, chain_id_2, r_cutoff) # Extract the chains of interest from the model chain_1 = None chain_2 = None for child in model.get_list(): if child.id == chain_id_1: chain_1 = child if child.id == chain_id_2: chain_2 = child if chain_1 is None or chain_2 is None: raise Exception('Chains %s and %s were not found in the model' % (chain_id_1, chain_id_2)) ns = NeighborSearch(list(chain_2.get_atoms())) interactions_between_chains = OrderedDict() for idx, residue_1 in enumerate(chain_1): if residue_1.resname in AMINO_ACIDS and residue_1.id[0] == ' ': resnum_1 = str(residue_1.id[1]) + residue_1.id[2].strip() resaa_1 = convert_aa(residue_1.get_resname(), quiet=True) interacting_residues = set() for atom_1 in residue_1: interacting_residues.update(ns.search(atom_1.get_coord(), r_cutoff, 'R')) interacting_resids = [] for residue_2 in interacting_residues: resnum_2 = str(residue_2.id[1]) + residue_2.id[2].strip() resaa_2 = convert_aa(residue_2.get_resname(), quiet=True) if residue_2.resname in AMINO_ACIDS and residue_2.id[0] == ' ': interacting_resids.append((resnum_2, resaa_2,)) if interacting_resids: interacting_resids.sort( key=lambda x: int(''.join([c for c in x[0] if c.isdigit()]))) interactions_between_chains[(resnum_1, resaa_1)] = interacting_resids return interactions_between_chains
def make_neighbors(self, fl_struct): # create an empty NeighborsNet nn = NeighborsNet() # use NeighborSearch from Bio.PDB to compute distances ns = NeighborSearch(list(self.bio_struct.get_atoms())) # for each chain in structure that is not a dna-rna one for fl_chain in fl_struct.get_chains(): if not fl_chain.rna_dna_chain: # for each residue in this chain for fl_res in fl_chain.residues: # add a default entry nn.add_default(fl_res) # keep track of already inserted nieghbors (the search is mate for each atom in the residue) already_have = [] # for each atom (coordinates) in the residue for atom_coord in fl_res.atoms_coord: # for each residue in range for res in ns.search(atom_coord, self.config["neighbors_range"], level='R'): # check if it is good atom and the same model, cause sometimes NS computes all models if is_good_res( res) and fl_res.model_id == res_model_id( res) and not res.get_full_id( ) in already_have: # try to get FlipperResidueAssociated pos_2 = fl_struct.chains[res_chain_id( res)].string_index_map.get( res_string_index(res)) # print(fl_res.get_full_identifier(), res.get_full_id(), fl_struct.chains[res_chain_id(res)].string_index_map.get(res_string_index(res))) if not pos_2 == None: fl_res_2 = fl_struct.chains[res_chain_id( res)].residues[pos_2] # if the chain is the same if fl_res.chain_id == fl_res_2.chain_id: already_have.append(res.get_full_id()) if fl_res.pos_in_chain == fl_res_2.pos_in_chain: continue # if distance (as residue number) is less than threshold, then it is a short range neighbor if abs( fl_res.pos_in_chain - fl_res_2.pos_in_chain ) < self.config[ "long_short_threshold"] and not fl_chain.have_gaps( fl_res, fl_res_2): nn.add_short(fl_res, fl_res_2) # else it is a long rage neighbor else: nn.add_long(fl_res, fl_res_2) # if it is not in the same chain it is an inter chain neighbor else: nn.add_inter(fl_res, fl_res_2) already_have.append(res.get_full_id()) return nn
def extract_feature(self): seed(self.seed) print_info_nn( " >>> Adding D1 surface shape distribution for database {0} ... ". format(self._database.name)) overall_time = datetime.now() counter = 0 if not os.path.exists(self._get_dir_name()): os.makedirs(self._get_dir_name()) for complex_name in self._database.complexes.keys(): protein_complex = self._database.complexes[complex_name] proteins = [ protein_complex.unbound_formation.ligand, protein_complex.unbound_formation.receptor ] for protein in proteins: shape_dist_file = self._get_dir_name() + protein.name if not os.path.exists(shape_dist_file + ".npy"): counter += 1 if counter <= 15: print_info_nn("{0}, ".format(protein.name)) else: counter = 0 print_info("{0}".format(protein.name)) atoms = protein.atoms neighbour_search = NeighborSearch(atoms) distributions = np.zeros( (len(protein.residues), self.number_of_bins + 1)) for i in range(len(protein.residues)): residue = protein.residues[i] nearby_residues = [protein.biopython_residues[i]] temp_nearby_residues = neighbour_search.search( residue.center, self.radius, "R") for nearby_residue in temp_nearby_residues: if nearby_residue not in protein.biopython_residues: continue residues_index = protein.biopython_residues.index( nearby_residue) residue = protein.residues[residues_index] if residue.get_feature( Features.RELATIVE_ACCESSIBLE_SURFACE_AREA ) >= self.rASA_threshold: nearby_residues.append(nearby_residue) distributions[i, :] = self._compute_distribution( nearby_residues, residue.center) np.save(shape_dist_file, distributions) distributions = np.load(shape_dist_file + ".npy") for i in range(len(protein.residues)): protein.residues[i].add_feature( Features.D1_SURFACE_SHAPE_DISTRIBUTION, distributions[i, :]) print_info("took {0} seconds.".format( (datetime.now() - overall_time).seconds))
def secondary_struc_cmap(chain, sequence, structure, cutoff_distance=4.5, cutoff_numcontacts=10, exclude_neighbour=3, ss_elements=['H', 'E', 'B', 'b', 'G']): atom_list = Selection.unfold_entities(chain, 'A') res_list = Selection.unfold_entities(chain, 'R') res_names, numbering = [], [] for res in res_list: res_names.append(res.get_resname()) numbering.append(res.get_id()[1]) numbering = np.array(numbering) res_range = np.array(range(len(numbering))) assert len(structure) == len( numbering ), f'PDB file and Secondary structure map do not match!\n {chain.get_parent().get_parent().id} - PDB: {len(res_list)} Residues VS. STRIDE: {len(sequence)} Residues. ' ns = NeighborSearch(atom_list) all_neighbours = ns.search_all(cutoff_distance, 'A') struc_length = len(structure) segment = np.zeros([struc_length], dtype='int') nseg = 1 for i in range(struc_length): if structure[i] in ss_elements: segment[i] = nseg if i == struc_length: nseg += 1 elif structure[i + 1] != structure[i]: nseg += 1 nseg -= 1 index_list = [] for atompair in all_neighbours: res1 = res_range[numbering == atompair[0].get_parent().id[1]][0] res2 = res_range[numbering == atompair[1].get_parent().id[1]][0] if abs(res1 - res2) > exclude_neighbour: if segment[res1] != 0 and segment[res2] != 0 and segment[ res1] != segment[res2]: index_list.append((segment[res1] - 1, segment[res2] - 1)) index_list.sort() count = Counter(index_list) index = [values for values in count if count[values] >= cutoff_numcontacts] return np.array(index), segment
def check_clash(model, chain_to_add, clash_distance=2.5): """ Checks wether a newly added chain has clashes with the rest of the structure. Returns a set containing the clashing chains and a boolean indicating whether the newly added chain is clashing with itself or not. model: previous structure. chain_to_add: new chain (Chain object from PDB.Chain). clash_distance: indicates a distance threshold (in Angstroms) to consider two atoms as clashing. """ # Initialization of NeighbourSearch, that allows to find clashes neighbor_object = NeighborSearch(list(model.get_atoms())) structure_clashing_chains = set() total_clashes = 0 # For each atom, clashes are compute for atom in chain_to_add.get_atoms(): clashes = neighbor_object.search(atom.get_coord(), clash_distance) if len(clashes) > 0: # If clashes are found... # Increase the total number of clashes and add the conflicting chains to the set for clash in clashes: structure_clashing_chains.add( clash.get_parent().get_parent().id) total_clashes += 1 # In case the new chain is conflicting with several chains if len(structure_clashing_chains) > 1 and total_clashes > 20: return structure_clashing_chains, False elif len(structure_clashing_chains ) == 1 and total_clashes > 20 and chain_to_add.id[1] == list( structure_clashing_chains)[0][1]: clash_chain = model[0][list(structure_clashing_chains) [0]] # Conflictive chain RMSD = superimpose(clash_chain, chain_to_add) # Compute RMSD # If the RMSD is lower than 3.0, the chain is clashing with itself. if RMSD <= 3.0: return structure_clashing_chains, True # Else the chain is clashing with another chain of the structure else: return structure_clashing_chains, False elif total_clashes > 20: return structure_clashing_chains, False # No clashes found else: return None, False
def check_if_collide(self, component, point, radius): """ check whether aparticulat point does not collide with any components atoms Return all component's atoms that have at least one atom within radius of center for given point """ ns = NeighborSearch(list(component.get_atoms())) point_center = array([point.x, point.y, point.z]) #we assume that pseudoresidue radius ;lis 1.5A as a collistion detection area. found_collisions = len(ns.search(point_center, radius + 1.5, "A")) return found_collisions
def interaction(self, pdb_id, filename, domain_1, domain_2): """Returns a dict with informations (atoms, residues...) if two domains interact with each other, and returns False if not.""" print "Searching for interactions in "+pdb_id+"..." # creates a strucuture object/class to extract atoms of the two domains model = structure(pdb_id).get_model(pdb_id, filename) residues_1 = structure(pdb_id).get_residues(model, domain_1) residues_2 = structure(pdb_id).get_residues(model, domain_2) atoms_1 = Selection.unfold_entities(residues_1, 'A') atoms_2 = Selection.unfold_entities(residues_2, 'A') # gets the serial numbers of the atoms numbers_1 = structure(pdb_id).serial_numbers(atoms_1) numbers_2 = structure(pdb_id).serial_numbers(atoms_2) # the search starts here ! atoms = Selection.unfold_entities(model, 'A') nsearch = NeighborSearch(atoms) interacting_atoms_1 = [] interacting_atoms_2 = [] for atom in atoms: if atom.get_serial_number() in numbers_1: point = atom.get_coord() # This is how we detect an interaction, we put 5 angstroms # here. # This is the simplest method we can use, and we're not sure # that it is correct. # Originally we have planned to go further by doing a surface # and accesssion analysis, but we had no time. # We hope we can talk about that during the talk. neighbors = nsearch.search(point, 5) for neighbor in neighbors: if neighbor.get_serial_number() in numbers_2: interacting_atoms_2.append(neighbor) if atom not in interacting_atoms_1: interacting_atoms_1.append(atom) # returns a dict with all residues and atoms if len(interacting_atoms_2) > 0: infos = {} infos['1'] = {} infos['2'] = {} # just get the parent residues for the list of atoms interacting_residues_1 = structure(pdb_id).atoms2residues( interacting_atoms_1) interacting_residues_2 = structure(pdb_id).atoms2residues( interacting_atoms_2) infos['1']['atoms'] = interacting_atoms_1 infos['2']['atoms'] = interacting_atoms_2 infos['1']['residues'] = interacting_residues_1 infos['2']['residues'] = interacting_residues_2 return infos else: return False
def getReferenceAtomsWithinSphere(self, residue, radius, caOnly=False): if (not residue.__class__.__name__ == 'Residue'): raise TypeError( "The function process(residue, radius, caOnly) expects the first argument of class Bio.PDB.Residue" ) ns = NeighborSearch(Selection.unfold_entities(self.ref_chain, 'A')) ref_atoms = ns.search(residue['CA'].get_coord(), radius, level='A') if (caOnly): cas = [] for a in ref_atoms: if (a.get_name() == 'CA'): cas.append(a) return cas return ref_atoms
def get_neighbor_atoms(chain: ChainDesc, ligand: PhysicalResidue, produce_physical_atoms: bool = True) -> list: # a list of physical atoms # use biopython neighboursearch to get list of AA atoms that close enough to ligand's atoms (using 10 angstroms) # - get list of all chain's atoms except ligand's atoms # - for each ligand's atom run neighbor search to find neighbors # - add that neighbors to result list of neighbors (exclude duplicates) # collect chain atoms chain_atoms = list() # get bio python residues of the chain bio_residues = list(chain.chain.get_residues()) for residue in chain.get_residues(): # physical residue # do not count atoms from ligand itself if residue.get_residue_desc().get_short_name() == ligand.get_residue_desc().get_short_name(): continue # do not count atoms from ligands # TODO: refactor if residue.get_residue_desc().get_short_name() not in database.get_amino_acids() + database.get_cofactors(): continue elif residue.get_residue_desc().check_deeper(): continue for atom in bio_residues[residue.get_index() - 1].get_atoms(): # BioPython atom!!!! chain_atoms.append(atom) neighbour_atoms = list() for atom in ligand.get_atoms(): # physical atom search = NeighborSearch(chain_atoms) current_neighbours = search.search(atom.get_coords(), 10.0) for neighbour in current_neighbours: # BioPython atom!!!! if neighbour not in neighbour_atoms: neighbour_atoms.append(neighbour) if not produce_physical_atoms: return neighbour_atoms neighbour_physical_atoms = list() for neighbour in neighbour_atoms: terminus = None if neighbour.get_parent().get_id()[1] == 1: terminus = 'N' elif 'OXT' in [a.get_id() for a in neighbour.get_parent().get_atoms()]: terminus = 'C' atom_desc = database.get_residue(neighbour.get_parent(), terminus).get_atom(neighbour.get_id()) physical_atom = PhysicalAtom(bio_atom=neighbour, atom_desc=atom_desc, coords=neighbour.get_coord()) neighbour_physical_atoms.append(physical_atom) return neighbour_physical_atoms
def get_contacts(model, cutOff=5., minSeqDist=5): contacts = list() ns = NeighborSearch(list(model.get_atoms())) foundPairs = ns.search_all(cutOff) for pair in foundPairs: fullAtomIDs = [pair[0].get_full_id(), pair[1].get_full_id()] distance = pair[1] - pair[0] weight = 1. contacts.append((fullAtomIDs, distance, weight)) pass return contacts
def search_stacking(self): """ KC - stacking research in model """ #KC#CG# list of aromatics residues atoms atom_for_search = [] for res in self.model.res: if res.resname in ['PHE', 'TYR', 'TRP', 'HIS']: for name in parametres.dico_cycles[res.resname]["cycle"]: atom_for_search.append(res[name]) #KC#CG# list of residues pairs that have atoms pairs within 6A radius self.aromatic_less_5 = NeighborSearch(atom_for_search).search_all( 6, level='R') #KC#CG# search of stacking self.stacking = [] for res1, res2 in self.aromatic_less_5: if is_stacking(res1, res2): if res1.get_id()[1] < res2.get_id()[1]: self.stacking.append((res1, res2)) else: self.stacking.append((res2, res1)) self.stacking_networks = self.search_network(self.stacking, _type='stacking')
def pdb_dist(pdb_f, pdb_id): structure = PDBParser().get_structure(pdb_id, pdb_f) atom_list = Selection.unfold_entities(structure, 'A') ns = NeighborSearch(atom_list) center = [a for a in structure.get_atoms() if a.get_parent().get_resname( ) in ['PTR', 'SEP', 'TPO'] and a.get_name() in ['O1P', 'O2P', 'O3P', 'OH', 'OG', 'OG1']] # neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname( # ) in ['ARG'] and a.get_name() in ['NE', 'NH2', 'NH1']] # neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname( # ) in ['Lys'] and a.get_name() in ['NZ']] neighbors = [a for a in structure.get_atoms() if a.get_parent().get_resname( ) in ['HIS'] and a.get_name() in ['ND1','NE2']] def calc_dist(a, b): vector = a.coord - b.coord return np.sqrt(np.sum(vector * vector)) dist = {} for c in center: for n in neighbors: value = calc_dist(c, n) key = str(c.get_parent()) + '_' + str(n.get_parent()) if not key in dist.keys(): dist[key] = [value] else: dist[key].append(value) for k, v in dist.items(): dist[k] = min(v) dist = [v for k, v in dist.items()] return dist
def filter_structure(pdb,chain,lig,lig_num): # pdb_input = pdb_path + "/" + pdb + ":" + chain + ":" + lig + ":" + lig_num + ".atoms.pdb" pdb_input = pdb_path + "/" + pdb + ".pdb" pdb_output = output_path + "/" + pdb + ":" + chain + ":" + lig + ":" + lig_num + ".atoms.pdb" structure = PDBParser(QUIET=1).get_structure(pdb, pdb_input) atoms_pairs = NeighborSearch( list( structure.get_atoms() ) ).search_all(search_radius) res_list = set() for atom_pair in atoms_pairs: res1 = atom_pair[0].parent res_chain1 = res1.parent.id res_name1 = res1.resname res_num1 = str(res1.id[1]) res2 = atom_pair[1].parent res_chain2 = res2.parent.id res_name2 = res2.resname res_num2 = str(res2.id[1]) if ( (res_chain1 == chain and res_name1 == lig and res_num1 == lig_num) or (res_chain2 == chain and res_name2 == lig and res_num2 == lig_num) ): res_list.add(res1) res_list.add(res2) io = PDBIO() io.set_structure(structure) io.save(pdb_output, ResSelect(res_list))
def interchain_residue_contacts(self, chain_ids_1, chain_ids_2, radius): """ Generate a list of residue contacts between two chains. """ all_chains = {chain.get_id(): chain for chain in self.get_chains()} selected_chains = { chain_id: chain for chain_id, chain in all_chains.items() if chain_id in chain_ids_1 + chain_ids_2 } atoms = [ atom for chain_id, chain in selected_chains.items() for atom in Selection.unfold_entities(chain, "A") ] residue_contacts = NeighborSearch(atoms).search_all(radius, "R") classified_contacts = defaultdict(list) for contact in residue_contacts: chain_1, chain_2 = [ residue.get_parent().get_id() for residue in contact ] if chain_1 in chain_ids_1 and chain_2 in chain_ids_2: classified_contacts[(chain_1, chain_2)].append({ chain_1: contact[0], chain_2: contact[1] }) elif chain_2 in chain_ids_1 and chain_1 in chain_ids_2: classified_contacts[(chain_2, chain_1)].append({ chain_1: contact[0], chain_2: contact[1] }) return classified_contacts
def calculateNeighbors(filename, radius): data = {} structure = parser.get_structure(filename.split(".pdb")[0], filename) atom_list = Selection.unfold_entities(structure, 'A') # A for atoms residue_list = Selection.unfold_entities(structure, 'R') # R for residues neighbor_search = NeighborSearch(atom_list) for residue in residue_list: resid = str(residue.get_id()[1]) contacts = [] for atom in residue.get_list(): contacts.extend(neighbor_search.search(atom.get_coord(), radius, level = "A")) burial = len(contacts)/len(residue.get_list()) data[resid] = burial return data
def calculate_ic(structure, d_cutoff=5.5, selection=None): """ Calculates intermolecular contacts in a parsed structure object. """ atom_list = list(structure.get_atoms()) ns = NeighborSearch(atom_list) all_list = ns.search_all(radius=d_cutoff, level='R') if selection: _sd = selection_dict ic_list = [c for c in all_list if (c[0].parent.id in _sd and c[1].parent.id in _sd) and (_sd[c[0].parent.id] != _sd[c[1].parent.id]) ] else: ic_list = [c for c in all_list if c[0].parent.id != c[1].parent.id] if not ic_list: raise ValueError('No contacts found for selection') return ic_list
def _remove_distant_hatatms(self, new_model, hetatm_chain): """Detach hetatms that are more than ``self.r_cutoff`` away from the main chain(s).""" ns = NeighborSearch(list(new_model.get_atoms())) hetatm_chain.id = [ c for c in reversed(string.ascii_uppercase) if c not in self.chain_ids][0] res_idx = 0 while res_idx < len(hetatm_chain): res_1 = hetatm_chain.child_list[res_idx] in_contact = False for atom_1 in res_1: interacting_residues = ns.search(atom_1.get_coord(), self.r_cutoff, 'R') if interacting_residues: # logger.debug(res_1.id) # logger.debug(interacting_residues) in_contact = True if in_contact: res_idx += 1 continue # logger.debug('Detaching child: {}'.format(res_1.id)) hetatm_chain.detach_child(res_1.id)
def extract_feature(self): seed(self.seed) print_info_nn(" >>> Adding D1 surface shape distribution for database {0} ... ".format(self._database.name)) overall_time = datetime.now() counter = 0 if not os.path.exists(self._get_dir_name()): os.makedirs(self._get_dir_name()) for complex_name in self._database.complexes.keys(): protein_complex = self._database.complexes[complex_name] proteins = [protein_complex.unbound_formation.ligand, protein_complex.unbound_formation.receptor] for protein in proteins: shape_dist_file = self._get_dir_name() + protein.name if not os.path.exists(shape_dist_file + ".npy"): counter += 1 if counter <= 15: print_info_nn("{0}, ".format(protein.name)) else: counter = 0 print_info("{0}".format(protein.name)) atoms = protein.atoms neighbour_search = NeighborSearch(atoms) distributions = np.zeros((len(protein.residues), self.number_of_bins + 1)) for i in range(len(protein.residues)): residue = protein.residues[i] nearby_residues = [protein.biopython_residues[i]] temp_nearby_residues = neighbour_search.search(residue.center, self.radius, "R") for nearby_residue in temp_nearby_residues: if nearby_residue not in protein.biopython_residues: continue residues_index = protein.biopython_residues.index(nearby_residue) residue = protein.residues[residues_index] if residue.get_feature(Features.RELATIVE_ACCESSIBLE_SURFACE_AREA) >= self.rASA_threshold: nearby_residues.append(nearby_residue) distributions[i, :] = self._compute_distribution(nearby_residues, residue.center) np.save(shape_dist_file, distributions) distributions = np.load(shape_dist_file + ".npy") for i in range(len(protein.residues)): protein.residues[i].add_feature(Features.D1_SURFACE_SHAPE_DISTRIBUTION, distributions[i, :]) print_info("took {0} seconds.".format((datetime.now() - overall_time).seconds))
structure = PDBParser().get_structure('X', args.pdb) center_atoms = [] pymol_command = "" all_atom_list = [atom for atom in structure.get_atoms() if atom.name == 'CA' ] for k in args.chain1 : chain_atoms = [atom for atom in structure[0][k].get_atoms() if atom.name == 'CA' ] center_atoms += chain_atoms #atom_list = [x for x in all_atom_list if x not in center_atoms] for j in args.chain2 : atom_list = [atom for atom in structure[0][j].get_atoms() if atom.name == 'CA' ] ns = NeighborSearch(atom_list) nearby_residues = {res for center_atom in center_atoms for res in ns.search(center_atom.coord, 8.5, 'R')} print "\nNeighbor residues in chain ", j, ": \n" print sorted(res.id[1] for res in nearby_residues) pymol_command = "show spheres, chain " + j + " and resi " for m in sorted(res.id[1] for res in nearby_residues): pymol_command = pymol_command + str(m) + "+" print pymol_command[:-1] + " and name CA \n"
SCALAR_EXPRESSION %s_o = %s_startenergy + 4.91 SCALAR_EXPRESSION %s_s = %s SCALAR_EXPRESSION %s_exp = exp(%s_s*(%s_currentenergy-%s_o)) SCALAR_EXPRESSION %s_k = %s SCALAR_EXPRESSION %s_sig = (1-%s_k)+(%s_k/(1+%s_exp)) """) %(structure_id, structurefilename, correspondencefilename, resfilename, structure_id, startenergy, structure_id, structure_id, structure_id, s_value, structure_id, structure_id, structure_id, structure_id, structure_id, k_value, structure_id, structure_id, structure_id, structure_id) fitnessfile.write(outstring) fitnessstring = fitnessstring + str("*%s_sig") % (structure_id) nucleosome = structure[0] atom_list = Selection.unfold_entities(nucleosome, 'A') # A for atoms neighbor_search = NeighborSearch(atom_list) contacts_list = neighbor_search.search_all(radius, level = 'R') repack_residues = [] for contact in contacts_list: res1 = contact[0] res2 = contact[1] res1id = int(res1.get_id()[1]) chain1 = res1.get_parent() chain1id = chain1.get_id() res2id = int(res2.get_id()[1]) chain2 = res2.get_parent() chain2id = chain2.get_id() res1_in_patch = False
PDB = pdb_biomol_match.group(1) BIOMOL_ID = pdb_biomol_match.group(2) pdb_rcsb_asm_match = re.search(PDB_RCSB_ASM_REGEX, INPUT_FILENAME) if pdb_rcsb_asm_match: PDB = pdb_rcsb_asm_match.group(1) BIOMOL_ID = pdb_rcsb_asm_match.group(2) # LOAD STRUCTURE structure = PDBParser().get_structure('structure', INPUT_FILE) structure_atoms = list(structure.get_atoms()) logging.info('Loaded PDB structure (BioPython).') # CONSTRUCT KDTREE neighborsearch = NeighborSearch(structure_atoms) logging.info('Constructured NeighborSearch.') # GET INTERACTIONS logging.info('Calculating interactions...') for interaction_level in 'ARC': if interaction_level in OUTPUTS: logging.info('Calculating interactions for {}s...'.format( LEVEL_MAP[interaction_level])) pairs = neighborsearch.search_all(INTERACTION_THRESHOLD, level=interaction_level)
def _build_interface(self, model, id, threshold, rsa_calculation, rsa_threshold, include_waters=False, *chains): """ Return the interface of a model """ self.threshold=threshold # Recover chain list from initial unpacking chain_list = self.chain_list # Unfold atom list atom_list = [] for c in model: if c.id in chain_list: atom_list.extend(Selection.unfold_entities(c,'A')) # Using of NeighborSearch class in order to get the list of all residues at least than # the threshold distance of each others ns=NeighborSearch(atom_list) pairs=ns.search_all(threshold, 'R') if not pairs: raise ValueError("No atoms found in the interface") # Selection of residues pairs # 1. Exclude water contacts # 2. Filter same-chain contacts # 3. Filter user-defined chain pairs uniq_pairs=[] for pair in pairs: pair_resnames = (pair[0].resname, pair[1].resname) pair_chains = (pair[0].parent.id, pair[1].parent.id) if (not include_waters and 'HOH' in pair_resnames) or (pair_chains[0] == pair_chains[1]): continue if not (chains and not (pair_chains in chains)): uniq_pairs.append(pair) # Build the Interface # 1. Iterate over the pair list # 2. Add residues. for resA, resB in uniq_pairs: if resA not in self.interface: self._add_residue(resA) if resB not in self.interface: self._add_residue(resB) # Accessible surface area calculated for each residue # if naccess setup on user computer and rsa_calculation # argument is TRUE if rsa_calculation and os.system('which naccess') == 0: rsa_pairs=self._rsa_calculation(model, chain_list, rsa_threshold) for res in rsa_pairs: if res not in self.interface: self._add_residue(res) self._secondary_structure(model) #interface=uniq_pairs self.interface.uniq_pairs=uniq_pairs
for residue in structure.get_residues(): if residue.resname == args.ligand: ligand = residue break if not ligand: print('[!!] Ligand residue \'{0}\' not found in structure'.format(args.ligand), file=sys.stderr) sys.exit(1) # Calculate center of mass of the ligand ligand_com = map(lambda x: sum(x)/len(x), zip(*[at.coord for at in ligand])) ligand_com = np.asarray(ligand_com, dtype=np.float32) # Calculate neighbors considering only aminoacid/nucleotide atoms (excl. waters, other ligands, etc) sel_atoms = [at for at in structure.get_atoms() if at.parent.id[0] == ' '] ns = NeighborSearch(sel_atoms) neighbors = ns.search(ligand_com, 10.0, level='R') # 10A radius, return residues # Calculate residue closer to each ligand atom and the respective distance ligand_atoms = ligand.child_list min_dist_list, _seen = [], set() for l_at in ligand_atoms: distances = [] for residue in neighbors: for r_at in residue: distances.append((r_at, l_at, r_at - l_at)) distances.sort(key=lambda x: x[-1]) min_dist = distances[0] # One restraint per residue to keep the number of restraints small
def get_interacting_residues(model, r_cutoff=5, skip_hetatm_chains=True): """Return residue-residue interactions between all chains in `model`. Parameters ---------- model : biopython.Model Model to analyse. Returns ------- dict A dictionary of interactions between chains i (0..n-1) and j (i+1..n). Keys are (chain_idx, chain_id, residue_idx, residue_resnum, residue_amino_acid) tuples. (e.g. (0, 'A', 0, '0', 'M'), (0, 1, '2', 'K'), ...) Values are a list of tuples having the same format as the keys. Examples -------- You can reverse the order of keys and values like this:: complement = dict() for key, values in get_interacting_chains(model): for value in values: complement.setdefault(value, set()).add(key) You can get a list of all interacting chains using this command:: {(key[0], value[0]) for (key, values) in get_interacting_chains(model).items() for value in values} """ from Bio.PDB import NeighborSearch interactions_between_chains = dict() # Chain 1 for chain_1_idx, chain_1 in enumerate(model): if skip_hetatm_chains and chain_is_hetatm(chain_1): message = ( "Skipping chain_1 with idx {} because it contains only hetatms." .format(chain_1_idx) ) logger.debug(message) continue chain_1_residue_ids = get_aa_residues(chain_1) # Chain 2 for j, chain_2 in enumerate(model.child_list[chain_1_idx + 1:]): chain_2_idx = chain_1_idx + 1 + j if skip_hetatm_chains and chain_is_hetatm(chain_2): message = ( "Skipping chain_2 with idx {} because it contains only hetatms." .format(chain_2_idx) ) logger.debug(message) continue chain_2_residue_ids = get_aa_residues(chain_2) ns = NeighborSearch(list(chain_2.get_atoms())) # Residue 1 for residue_1 in chain_1: try: residue_1_idx = chain_1_residue_ids.index(residue_1.id) except ValueError: continue residue_1_resnum = str(residue_1.id[1]) + residue_1.id[2].strip() residue_1_aa = convert_aa(residue_1.resname, quiet=True) residue_1_key = ( chain_1_idx, chain_1.id, residue_1_idx, residue_1_resnum, residue_1_aa ) interacting_residues = set() for atom_1 in residue_1: interacting_residues.update(ns.search(atom_1.get_coord(), r_cutoff, 'R')) # Residue 2 interacting_residue_ids = [] for residue_2 in interacting_residues: try: residue_2_idx = chain_2_residue_ids.index(residue_2.id) except ValueError: continue residue_2_resnum = str(residue_2.id[1]) + residue_2.id[2].strip() residue_2_aa = convert_aa(residue_2.get_resname(), quiet=True) residue_2_key = ( chain_2_idx, chain_2.id, residue_2_idx, residue_2_resnum, residue_2_aa ) interacting_residue_ids.append(residue_2_key) if interacting_residue_ids: interactions_between_chains\ .setdefault(residue_1_key, set())\ .update(interacting_residue_ids) return interactions_between_chains