def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) center = numpy.require(center, dtype='d', requirements='C') if center.shape != (3, ): raise Exception("Expected a 3-dimensional NumPy array") points = self.kdt.search(center, radius) atom_list = [self.atom_list[point.index] for point in points] if level == "A": return atom_list else: return unfold_entities(atom_list, level)
def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) self.kdt.search(center, radius) indices = self.kdt.get_indices() n_atom_list = [] atom_list = self.atom_list for i in indices: a = atom_list[i] n_atom_list.append(a) if level == "A": return n_atom_list else: return unfold_entities(n_atom_list, level)
def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) center = numpy.require(center, dtype='d', requirements='C') if center.shape != (3,): raise Exception("Expected a 3-dimensional NumPy array") points = self.kdt.search(center, radius) atom_list = [self.atom_list[point.index] for point in points] if level == "A": return atom_list else: return unfold_entities(atom_list, level)
def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) self.kdt.search(center, radius) indices = self.kdt.get_indices() n_atom_list = [] atom_list = self.atom_list for i in indices: a = atom_list[i] n_atom_list.append(a) if level == "A": return n_atom_list else: return unfold_entities(n_atom_list, level)
def CheckClashes(structure, chain): """ Checks for clashes at a radius = 2 between a PDB structure and all the atoms on the provided chain. Returns True or False depending if number of different residues clashing exceeds 25. Arguments: -structure: PDB structure. -chain: PDB chain structure to check if it has clashes with the main structure. """ # declare NeighborSearch() object instance with all the atoms from the structure (model 0), that includes all chains of that structure. ns = NeighborSearch(unfold_entities(structure[0], 'A')) # iterate over atoms in input chain, search for close residues clashing_residues = set([]) for atom in chain.get_atoms(): close_res = ns.search(atom.get_coord(), radius=2, level="R") try: close_res.remove(atom.get_parent()) except ValueError: pass for res in close_res: neighbor_res = (atom.get_parent(), res) clashing_residues.add(neighbor_res) if len(clashing_residues) > 25: return True return False
def test_from_structure_level(self): """Unfold from highest level to all levels.""" struct_unfold = unfold_entities(self.structure, "S")[0] for res1, res2 in zip(self.structure.get_residues(), struct_unfold.get_residues()): assert res_full_id(res1) == res_full_id(res2) model_unfold = unfold_entities(self.structure, "M")[0] for res1, res2 in zip(self.structure.get_residues(), model_unfold.get_residues()): assert res_full_id(res1) == res_full_id(res2) residue_unfold = unfold_entities(self.structure, "R") for res1, res2 in zip(self.structure.get_residues(), residue_unfold): assert res_full_id(res1) == res_full_id(res2) atom_unfold = unfold_entities(self.structure, "A") for at1, at2 in zip(self.structure.get_atoms(), atom_unfold): assert at1 is at2
def get_residues(file, mod, ch, first_to_remove) -> list: '''Residues from pdb file It select only proteinogenic residues excluding all water molecules and ions. Args: file (str): absolute/relative path for pdb file mod (int): selects the wanted model (must be => 0) ch (int): selects the wanted chain (must be => 0) first_to_remove (int): number of residues to remove from the beginning of the chain (e.g. because they are added artificially to make the protein crystallize) Returns: list: list of proteinogenic residues excluding all water molecules and ions ''' parser = pdb.PDBParser() name_protein = file[-8:-4] # get unique protein ID of 4 characters # This assumes that the file name is the protein ID structure = parser.get_structure(name_protein, file) # Info to print while calling the parser # print(f'Parsing: {name_protein}') # print('Models: ', len(list(structure.get_models()))) # print('Chains: ', len(list(structure.get_chains()))) # Unpacking the selected chain models = unfold_entities(structure, 'M') chains = unfold_entities(models[mod], 'C') res_list_full = unfold_entities(chains[ch], 'R') # filtering out all but proteinogeneic residues proteinogenic_res = \ ['ALA','CYS','ASP','GLU','PHE','GLY','HIS','ILE','LYS','LEU','MET', 'ASN','PYL','PRO','GLN','ARG','SER','THR','SEC','VAL','TRP','TYR'] res_list = [] for res in res_list_full: if res.get_resname() in proteinogenic_res: res_list.append(res) return res_list[first_to_remove:]
def get_atoms_of_res_sidechain(residue): """ finds all atoms of a given sidechain for a residue removes C, O, N :param residue: :return: """ atoms_in_res = unfold_entities(residue, 'A') for atom in atoms_in_res: if atom.get_name() in ['C', 'O', 'N']: atoms_in_res.remove(atom) return atoms_in_res
def test_entities_not_homogenous(self): structure_atom = next(self.structure.get_atoms()) structure_chain = next(self.structure.get_chains()) with self.assertRaises(PDBException): unfold_entities([structure_atom, structure_chain], "A")
def test_invalid_level(self): with self.assertRaises(PDBException): unfold_entities(self.structure, "Z")
def build_align_file(input_pdb, pdbcode, output_align_file="protein.ali"): """ Function that takes a PDB filepath, detects missing residues and builds a MODELLER align file with this information, to be later used for completing residues. :param input_pdb: PDB filepath :param pdbcode: Code identifier for the PDB structure. Ex: 2y8d :param output_align_file: Filepath for output align file. :return: """ # Read structure and extract present and missing residues pdbparser = PDBParser() structure = pdbparser.get_structure(pdbcode, input_pdb) chains = unfold_entities(structure, "C") # Get chains missing_residues = structure.header[ "missing_residues"] # Get missing residues from whole structure # Remove alignment file if exists try: os.remove(output_align_file) except FileNotFoundError: pass # Where to store the sequences from structure separated by chains/index whole_gapped = [] whole_full = [] for chain in chains: chain_id = chain.get_id() residues = unfold_entities(chain, "R") # Get residues of chain missing_res_chain = get_chain_missing_res(missing_residues, chain_id) # Residues with empty id[0] are the 'real' residues, others are solvent or different. residues_list = [(residue.id[1], seq1(residue.resname)) for residue in residues if residue.id[0] == " "] for mis_res in missing_res_chain: insert_gap(mis_res["ssseq"], residues_list) # Sequence with gaps try: gapped_seq = "".join(np.array(residues_list)[:, 1]) except IndexError: # Warn the user if the residues list is empty (probably HETATOMS) msg = "Residues list for chain {} is empty. Check PDB, probably chain is" \ "full of HETATOM type atoms. Leaving chain empty in align " \ "file.".format(chain) warnings.warn(msg) gapped_seq = "" # Empty seq for chain full of HETATOM or non-standard # Make the line width the correct/expected one for modeller align file textwrap.wrap(gapped_seq, width=75, break_on_hyphens=False) # Full sequence without gaps by replacing gaps with the missing res full_seq = gapped_seq for mis_res in missing_residues: full_seq = full_seq.replace("-", seq1(mis_res["res_name"]), 1) whole_gapped.append(gapped_seq) whole_full.append(full_seq) # For checking full_seq # print(full_seq) # Building whole strings to write to file. "/" char separates chains. whole_gapped_str = "/".join(whole_gapped) whole_full_str = "/".join(whole_full) # Writing to file # Remember sequences have to end with the * character with open(output_align_file, "a+") as file: # Writing structure/gapped section file.write(">P1;" + structure.id + "\n") file.write("structureX:" + structure.id + ":FIRST:@ END:@" + 5 * ":." + "\n") for line in textwrap.wrap(whole_gapped_str + "*", width=75, break_on_hyphens=False): file.write("%s\n" % line) # Writing full sequence section file.write(">P1;" + structure.id + "_fill\n") file.write("sequence:" + structure.id + ":FIRST:@ END:@" + 5 * ":." + "\n") for line in textwrap.wrap(whole_full_str + "*", width=75, break_on_hyphens=False): file.write("%s\n" % line)
def view_in_pymol(id, predicted_voxels=None, truth_voxels=None, voxel_atom_ratio=.2): pdb, chain = id.split(".") structure = Structure.from_pdb(pdb, chain, rotate=False) cmd = """fetch {id} remove hetatm hide everything, {id} show surface, {id} color gray90, {id} """.format(id=id) if truth_voxels is not None: truth_atoms = Counter() for v in truth_voxels: atoms = structure.convert_voxels(v, level="A") if len(atoms) > 0: truth_atoms[atoms[0]] += 1 truth_atoms = [atom for atom, count in truth_atoms.iteritems() \ if float(count)/atom_volume(structure, atom) >= voxel_atom_ratio] truth_residues = [ str(r.get_id()[1]) for r in unfold_entities(truth_atoms, "R") ] truth_resi = "+".join(truth_residues) cmd += """select true_binding_site, resi {true_resi} color orange, true_binding_site """.format(true_resi=truth_resi) if predicted_voxels is not None: predicted_atoms = Counter() for v in predicted_voxels: atoms = structure.convert_voxels(v, level="A") if len(atoms) > 0: predicted_atoms[atoms[0]] += 1 predicted_atoms = [atom for atom, count in predicted_atoms.iteritems() \ if float(count)/atom_volume(structure, atom) >= voxel_atom_ratio] predicted_residues = [ str(r.get_id()[1]) for r in unfold_entities(predicted_atoms, "R") ] predicted_resi = "+".join(truth_residues) cmd += """select predicted_binding_site, resi {predicted_resi} color magenta, predicted_binding_site """.format(predicted_resi=predicted_resi) if truth_voxels is not None and predicted_voxels is not None: false_postive_voxels = set(predicted_residues) - set(truth_residues) fp_resi = "+".join(false_postive_voxels) cmd += """select false_positive_binding_site, resi {fp_resi} color blue, false_positive_binding_site """.format(fp_resi=fp_resi) with open("{}_pymol.cmd".format(id), "w") as f: print >> f, cmd
def SuperimposeStructures(object_list, complex, RMSD_threshold): """ Superimposes chains from objects in object_list to chains in complex. Adds the non-clashing chains to the complex and removes the structure from the object_list. Returns the complex with the new added chains, and the updated object_list with Arguments: -object_list : list of PDB objects that have to be superimposed and added to the complex. -complex: main structure to which individual chains from the object_list have to be added after superimposition. -RMSD_threshold: threshold for the RMSD value of the superposition between a chain of an object an the same chain on the complex. Default value for the program is 0.5. """ # Get core chain to start reconstruction core = FindCoreChain(object_list) if options.verbose: sys.stderr.write("Chain defined as core to superimpose: %s\n" % (core)) sys.stderr.write("Added to the final complex:\n") # Declare Superimpose object sup = Superimposer() ref_struct = None for structure in list(object_list): # select the first structure with the core chain to be the reference try: if core in structure[0] and not ref_struct: ref_struct = copy.deepcopy(structure) complex.add(ref_struct[0]) except: pass # if the structure contains the core chain, superimpose that to the chain with same name in ref structure set before if core in structure[0] and (structure is not ref_struct): sup.set_atoms(unfold_entities(ref_struct[0][core], 'A'), unfold_entities(structure[0][core], 'A')) sup.apply(structure[0]) RMSD = float(sup.rms) print(RMSD) # check for clashes before adding new chain to complex if RMSD < RMSD_threshold: for chain in structure[0]: if chain.get_id() != core: if not CheckClashes(complex, chain): chain_copy = copy.deepcopy(chain) N = 65 while chain_copy.get_id() in [ a.get_id() for a in complex.get_chains() ]: try: chain_copy.id = chr(N) except ValueError: pass N += 1 complex[0].add(chain_copy) if options.verbose: sys.stderr.write("\tChain %s\n" % (chain.id)) object_list.remove(structure) return (complex, object_list)