class TransformTests(unittest.TestCase): def setUp(self): self.s = PDBParser(PERMISSIVE=True).get_structure( 'X', "PDB/a_structure.pdb") self.m = self.s.get_list()[0] self.c = self.m.get_list()[0] self.r = self.c.get_list()[0] self.a = self.r.get_list()[0] def get_total_pos(self, o): """ Returns the sum of the positions of atoms in an entity along with the number of atoms. """ if hasattr(o, "get_coord"): return o.get_coord(), 1 total_pos = numpy.array((0.0,0.0,0.0)) total_count = 0 for p in o.get_list(): pos, count = self.get_total_pos(p) total_pos += pos total_count += count return total_pos, total_count def get_pos(self, o): """ Returns the average atom position in an entity. """ pos, count = self.get_total_pos(o) return 1.0*pos/count def test_transform(self): """Transform entities (rotation and translation).""" for o in (self.s, self.m, self.c, self.r, self.a): rotation = rotmat(Vector(1,3,5), Vector(1,0,0)) translation=numpy.array((2.4,0,1), 'f') oldpos = self.get_pos(o) o.transform(rotation, translation) newpos = self.get_pos(o) newpos_check = numpy.dot(oldpos, rotation) + translation for i in range(0, 3): self.assertAlmostEqual(newpos[i], newpos_check[i])
class CopyTests(unittest.TestCase): def setUp(self): self.s = PDBParser(PERMISSIVE=True).get_structure( 'X', "PDB/a_structure.pdb") self.m = self.s.get_list()[0] self.c = self.m.get_list()[0] self.r = self.c.get_list()[0] self.a = self.r.get_list()[0] def test_atom_copy(self): aa = self.a.copy() self.assertFalse(self.a is aa) self.assertFalse(self.a.get_coord() is aa.get_coord()) def test_entitity_copy(self): """Make a copy of a residue.""" for e in (self.s, self.m, self.c, self.r): ee = e.copy() self.assertFalse(e is ee) self.assertFalse(e.get_list()[0] is ee.get_list()[0])
class CopyTests(unittest.TestCase): """Tests copying SMCRA objects.""" def setUp(self): with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) self.s = PDBParser(PERMISSIVE=True).get_structure( "X", "PDB/a_structure.pdb") self.m = self.s.get_list()[0] self.c = self.m.get_list()[0] self.r = self.c.get_list()[0] self.a = self.r.get_list()[0] def test_atom_copy(self): aa = self.a.copy() self.assertIsNot(self.a, aa) self.assertIsNot(self.a.get_coord(), aa.get_coord()) def test_entity_copy(self): """Make a copy of a residue.""" for e in (self.s, self.m, self.c, self.r): ee = e.copy() self.assertIsNot(e, ee) self.assertIsNot(e.get_list()[0], ee.get_list()[0])
class Parsed_pdb_file: """ Class to represent a parsed PDB file, which information can be used to add 'PyMod_sequence_element' with structures to PyMod. """ # counter = 0 parsed_file_code = "parsed_by_pymod" blank_chain_character = "X" def __init__(self, pymod, pdb_file_path, output_directory="", new_file_name=None, copy_original_file=True, save_chains_files=True): # self.list_of_structure_dicts = [] self.pymod = pymod #------------------------------------------------------------------------------------ # Defines the name of the files which will be built from the parsed structure file. - #------------------------------------------------------------------------------------ self.output_directory = output_directory # Directory where the output files (such as the split chains files) are going to be built. self.original_pdb_file_path = pdb_file_path # Path of the original structure file on the user's system. self.original_base_name = os.path.splitext(os.path.basename(self.original_pdb_file_path))[0] # Original basename. # Define the name of the structures files derived from the original file. if not new_file_name: self.structure_file_name = self.original_base_name else: self.structure_file_name = os.path.splitext(os.path.basename(new_file_name))[0] #------------------------------------------------------------------------------------- # Initially copies the full PDB file in the ouptut directory using a temporary name. - #------------------------------------------------------------------------------------- copied_full_file_path = os.path.join(self.output_directory, self._get_full_structure_file_name()) if copy_original_file and not os.path.isfile(copied_full_file_path): shutil.copy(self.original_pdb_file_path, copied_full_file_path) #------------------------------ # Parses the header manually. - #------------------------------ pass #-------------------------------------------------------------------------------- # Split the sequence in chains, get the sequences and residues using Biopython. - #-------------------------------------------------------------------------------- warnings.simplefilter("ignore") # Actually parses the original structure file on the user's system. parsed_file_handle = open(self.original_pdb_file_path, "r") # Creates a biopython 'Structure' object and starts to take informations from it. self.parsed_biopython_structure = PDBParser(PERMISSIVE=1, QUIET=True).get_structure(self.parsed_file_code, parsed_file_handle) parsed_file_handle.close() # The items of this list will contain information used to build the elements to be loaded # in PyMod. list_of_parsed_chains = [] # ligands_ids = [] # Ligand heteroatoms and water molecules ids. # Starts to iterate through the models in the biopython object. for model in self.parsed_biopython_structure.get_list(): for chain in model.get_list(): parsed_chain = {"original_id": None, # Chain ID in the PDB file. "pymod_id": None, # The ID assigned in PyMod. "residues": [], "file_name": None, "file_path": None, "found_blank_chain": None, "has_standard_residues": False} # Assigns a blank "X" chain id for PDB structures that do not specify chains id. if chain.id != " ": parsed_chain["pymod_id"] = chain.id parsed_chain["found_blank_chain"] = False elif chain.id == " ": chain.id = self.blank_chain_character parsed_chain["pymod_id"] = self.blank_chain_character parsed_chain["found_blank_chain"] = True # Starts to build the sequences by parsing through every residue of the chain. for residue in chain: # Gets the 3 letter name of the current residue. resname = residue.get_resname() # get_id() returns something like: ('H_SCN', 1101, ' '). The first item is # the hetfield: 'H_SCN' for an HETRES, while ' ' for a normal residue. The # second item is the id of the residue according to the PDB file. hetfield, pdb_position = residue.get_id()[0:2] # For HETATM residues. if hetfield[0] == "H": # Check if the current HETRES is a modified residue. Modified residues will # be added to the primary sequence. if self._check_modified_residue(residue, chain): parsed_chain["residues"].append(pymod_residue.PyMod_modified_residue(three_letter_code=resname, one_letter_code=pymod_vars.modified_residue_one_letter, db_index=pdb_position)) else: parsed_chain["residues"].append(pymod_residue.PyMod_ligand(three_letter_code=resname, one_letter_code=pymod_vars.ligand_one_letter, db_index=pdb_position)) # For water molecules. elif hetfield == "W": parsed_chain["residues"].append(pymod_residue.PyMod_water_molecule(three_letter_code=resname, one_letter_code=pymod_vars.water_one_letter, db_index=pdb_position)) # For standard amminoacidic residues. Adds them to the primary sequence. else: parsed_chain["residues"].append(pymod_residue.PyMod_standard_residue(three_letter_code=resname, one_letter_code=seq_manipulation.three2one(resname), db_index=pdb_position)) parsed_chain["has_standard_residues"] = True # Only adds the chain to PyMod if it has at least one standard residue. if parsed_chain["has_standard_residues"]: list_of_parsed_chains.append(parsed_chain) # Stops after having parsed the first "model" in the biopython "Structure". This is # needed to import only the first model of multimodel files (such as NMR files). break #---------------------------------------------------------------------- # Build 'PyMod_elements' object for each chain of the structure file. - #---------------------------------------------------------------------- self.list_of_pymod_elements = [] self.list_of_chains_structure_args = [] for numeric_chain_id, parsed_chain in enumerate(list_of_parsed_chains): # Defines the path of the element chain structure file. Initially uses a temporary name # for the file names. When the structures will be loaded in PyMod/PyMOL they will be # renamed using the header of the PyMod element. parsed_chain["file_name"] = self._get_structure_chain_file_name(parsed_chain["pymod_id"]) parsed_chain["file_path"] = os.path.join(self.output_directory, parsed_chain["file_name"]) # Builds the new 'PyMod_element'. The header will be used to rename the chains once They # are loaded in PyMod/PyMOL. new_element_header = self._get_new_pymod_element_header(parsed_chain["pymod_id"], parsed_chain["found_blank_chain"]) new_element = self._build_pymod_element(residues=parsed_chain["residues"], element_header=new_element_header, color=self._get_chain_color(numeric_chain_id)) # Builds the new structure for the PyMod element. new_structure = PyMod_structure(file_name_root=self.structure_file_name, full_file_path=copied_full_file_path, chain_file_path=parsed_chain["file_path"], chain_id=parsed_chain["pymod_id"], numeric_chain_id=numeric_chain_id, original_structure_file_path=self.original_pdb_file_path, original_structure_id=self.pymod.pdb_counter) new_element.set_structure(new_structure) self.list_of_pymod_elements.append(new_element) self.list_of_chains_structure_args.append(new_structure) #------------------------------------------------------------------------------------ # Saves a PDB file with only the current chain of the first model of the structure. - #------------------------------------------------------------------------------------ if save_chains_files: io = PDBIO() io.set_structure(self.parsed_biopython_structure) for element in self.list_of_pymod_elements: saved_structure_filepath = element.get_structure_file(basename_only=False) io.save(saved_structure_filepath, Select_chain_and_first_model(element.get_chain_id())) #----------------------------------------- # Fix the chain's PDB file if necessary. - #----------------------------------------- # Checks if there are any ligands or water molecules before the polypeptide # chain atoms. found_lig = False reorder_structure_file = False for residue in element.get_residues(standard=True, modified_residues=True, ligands=True, water=True): if residue.is_polymer_residue(): if found_lig: reorder_structure_file = True break if not residue.is_polymer_residue(): found_lig = True # Reorder the residues in the structure file, so that the polymeric residues are put # first and the ligands and water molecules for last. if reorder_structure_file: np_hetatms = element.get_residues(standard=False, modified_residues=False, ligands=True, water=True) np_hetatms_ids = [h.db_index for h in np_hetatms] pol_lines = [] # This will be populated with the polymer chain lines. np_hetatms_lines = [] # This will be populated with the non-polymer heteroatom lines. str_fh = open(saved_structure_filepath, "r") for line_i, line in enumerate(str_fh): if line.startswith(("HETATM", "ATOM")): db_index = int(line[22:26]) if db_index in np_hetatms_ids: np_hetatms_lines.append(line) else: pol_lines.append(line) str_fh.close() # Rewrite the structure file with the new atom order. str_fh = open(saved_structure_filepath, "w") atm_count = 1 for l in pol_lines + np_hetatms_lines: str_fh.write(l[0:6] + str(atm_count).rjust(5, " ") + l[11:]) atm_count += 1 str_fh.write("TER" + str(atm_count).rjust(8, " ") + l[17:26].rjust(15, " ") + "\n") str_fh.write("END\n") str_fh.close() warnings.simplefilter("always") #------------------------------- # Finds the disulfide bridges. - #------------------------------- self._assign_disulfide_bridges() #------------- # Completes. - #------------- self.pymod.pdb_counter += 1 def _correct_chain_id(self, chain_id): if chain_id != " ": return chain_id else: return self.blank_chain_character def get_pymod_elements(self): return self.list_of_pymod_elements def get_chains_ids(self): return [struct.chain_id for struct in self.list_of_chains_structure_args] def get_pymod_element_by_chain(self, chain_id): for e in self.list_of_pymod_elements: if e.get_chain_id() == chain_id: return e raise KeyError("No element with chain '%s' was built from the parsed PDB file." % chain_id) ################################################################# # Check if a residue is part of a molecule. # ################################################################# peptide_bond_distance = 1.8 def _check_modified_residue(self, residue, chain): """ Returns 'True' if the residue is a modified residue, 'False' if is a ligand. """ if not self._check_polypetide_atoms(residue): return False # Checks if the heteroresidue is forming a peptide bond with its C carboxy atom. has_carboxy_link = self._find_links(residue, chain, self._get_carboxy_atom, self._get_amino_atom, "N") # Checks if the heteroresidue is forming a peptide bond with its N amino atom. has_amino_link = self._find_links(residue, chain, self._get_amino_atom, self._get_carboxy_atom, "C") return has_carboxy_link or has_amino_link def _check_polypetide_atoms(self, residue): """ Checks if a residue has the necessary atoms to make peptide bonds. """ return pymod_vars.std_amino_acid_backbone_atoms < set(residue.child_dict.keys()) or pymod_vars.mod_amino_acid_backbone_atoms < set(residue.child_dict.keys()) def _find_links(self, residue, chain, get_residue_atom, get_neighbour_atom, link="N"): # Get either the N amino or C carboxy atom of the residue. residue_atom = get_residue_atom(residue) if residue_atom == None: return None # Find other residues in the chain having an atom which can make a peptide bond with the # 'residue' provided in the argument. neighbour_atoms = [] for res in chain: if not res == residue: neighbour_atom = get_neighbour_atom(res) if neighbour_atom != None: neighbour_atoms.append(neighbour_atom) # Check if there is a neighbour atom close enough to make a peptide bond with it. If such # atom is found, the residue is a part of a polypeptide chain. for atom in neighbour_atoms: distance = atom - residue_atom if distance < 1.8: return True return False def _get_carboxy_atom(self, residue): return self._get_atom_by_type(residue, ("C", "C1")) def _get_amino_atom(self, residue): return self._get_atom_by_type(residue, ("N", "N2")) def _get_atom_by_type(self, residue, atom_types_tuple): for atom_type in atom_types_tuple: if atom_type in residue.child_dict: if not residue.child_dict[atom_type].is_disordered(): return residue.child_dict[atom_type] return None ################################################################# # Build files and PyMod elements from the parsed structure. # ################################################################# def _get_new_pymod_element_header(self, chain_id, found_blank_chain=False, compact_names=False): if not compact_names: parsed_chain_name = "%s_chain_%s.pdb" % (self.structure_file_name, chain_id) else: if not found_blank_chain: parsed_chain_name = "%s_%s.pdb" % (self.structure_file_name, chain_id) else: parsed_chain_name = "%s.pdb" % (self.structure_file_name) return os.path.splitext(parsed_chain_name)[0] def _get_structure_chain_file_name(self, chain_id): return pymod_vars.structure_chain_temp_name % (self.pymod.pdb_counter, chain_id) def _get_full_structure_file_name(self): return pymod_vars.structure_temp_name % self.pymod.pdb_counter def _build_pymod_element(self, residues, element_header, color): return self.pymod.build_pymod_element(pymod_element.PyMod_sequence_element, residues=residues, header=element_header, color=color) def _get_chain_color(self, chain_number): list_of_model_chains_colors = pymod_vars.pymol_regular_colors_list return list_of_model_chains_colors[chain_number % len(list_of_model_chains_colors)] ################################################################# # Analyze structural features. # ################################################################# def _assign_disulfide_bridges(self): """ Assigns disulfide bridges to the PyMod elements built from the parsed structure file. """ list_of_disulfides = get_disulfide_bridges_of_structure(self.parsed_biopython_structure) for dsb in list_of_disulfides: # Get the chain of the first SG atom. dsb_chain_i = self._correct_chain_id(dsb["chain_i"]) # Get the chain of the second SG. dsb_chain_j = self._correct_chain_id(dsb["chain_j"]) new_dsb = Disulfide_bridge(cys1=dsb["residue_i"][1], cys2=dsb["residue_j"][1], cys1_seq_index=self.get_pymod_element_by_chain(dsb_chain_i).get_residue_by_db_index(dsb["residue_i"][1]).seq_index, cys2_seq_index=self.get_pymod_element_by_chain(dsb_chain_j).get_residue_by_db_index(dsb["residue_j"][1]).seq_index, cys1_chain=dsb_chain_i, cys2_chain=dsb_chain_j, distance=dsb["distance"], chi3_dihedral=dsb["chi3_dihedral"]) # For intrachain residues. if dsb_chain_i == dsb_chain_j: self.get_pymod_element_by_chain(dsb_chain_i).add_disulfide(disulfide=new_dsb) # For interchain residues. else: self.get_pymod_element_by_chain(dsb_chain_j).add_disulfide(disulfide=new_dsb) self.get_pymod_element_by_chain(dsb_chain_i).add_disulfide(disulfide=new_dsb)