Exemple #1
0
class TransformTests(unittest.TestCase):

    def setUp(self):
        self.s = PDBParser(PERMISSIVE=True).get_structure(
            'X', "PDB/a_structure.pdb")
        self.m = self.s.get_list()[0]
        self.c = self.m.get_list()[0]
        self.r = self.c.get_list()[0]
        self.a = self.r.get_list()[0]

    def get_total_pos(self, o):
        """
        Returns the sum of the positions of atoms in an entity along
        with the number of atoms.
        """
        if hasattr(o, "get_coord"):
            return o.get_coord(), 1
        total_pos = numpy.array((0.0,0.0,0.0))
        total_count = 0
        for p in o.get_list():
            pos, count = self.get_total_pos(p)
            total_pos += pos
            total_count += count
        return total_pos, total_count

    def get_pos(self, o):
        """
        Returns the average atom position in an entity.
        """
        pos, count = self.get_total_pos(o)
        return 1.0*pos/count

    def test_transform(self):
        """Transform entities (rotation and translation)."""
        for o in (self.s, self.m, self.c, self.r, self.a):
            rotation = rotmat(Vector(1,3,5), Vector(1,0,0))
            translation=numpy.array((2.4,0,1), 'f')
            oldpos = self.get_pos(o)
            o.transform(rotation, translation)
            newpos = self.get_pos(o)
            newpos_check = numpy.dot(oldpos, rotation) + translation
            for i in range(0, 3):
                self.assertAlmostEqual(newpos[i], newpos_check[i])
Exemple #2
0
class TransformTests(unittest.TestCase):

    def setUp(self):
        self.s = PDBParser(PERMISSIVE=True).get_structure(
            'X', "PDB/a_structure.pdb")
        self.m = self.s.get_list()[0]
        self.c = self.m.get_list()[0]
        self.r = self.c.get_list()[0]
        self.a = self.r.get_list()[0]

    def get_total_pos(self, o):
        """
        Returns the sum of the positions of atoms in an entity along
        with the number of atoms.
        """
        if hasattr(o, "get_coord"):
            return o.get_coord(), 1
        total_pos = numpy.array((0.0,0.0,0.0))
        total_count = 0
        for p in o.get_list():
            pos, count = self.get_total_pos(p)
            total_pos += pos
            total_count += count
        return total_pos, total_count

    def get_pos(self, o):
        """
        Returns the average atom position in an entity.
        """
        pos, count = self.get_total_pos(o)
        return 1.0*pos/count

    def test_transform(self):
        """Transform entities (rotation and translation)."""
        for o in (self.s, self.m, self.c, self.r, self.a):
            rotation = rotmat(Vector(1,3,5), Vector(1,0,0))
            translation=numpy.array((2.4,0,1), 'f')
            oldpos = self.get_pos(o)
            o.transform(rotation, translation)
            newpos = self.get_pos(o)
            newpos_check = numpy.dot(oldpos, rotation) + translation
            for i in range(0, 3):
                self.assertAlmostEqual(newpos[i], newpos_check[i])
Exemple #3
0
class CopyTests(unittest.TestCase):
    def setUp(self):
        self.s = PDBParser(PERMISSIVE=True).get_structure(
            'X', "PDB/a_structure.pdb")
        self.m = self.s.get_list()[0]
        self.c = self.m.get_list()[0]
        self.r = self.c.get_list()[0]
        self.a = self.r.get_list()[0]

    def test_atom_copy(self):
        aa = self.a.copy()
        self.assertFalse(self.a is aa)
        self.assertFalse(self.a.get_coord() is aa.get_coord())

    def test_entitity_copy(self):
        """Make a copy of a residue."""
        for e in (self.s, self.m, self.c, self.r):
            ee = e.copy()
            self.assertFalse(e is ee)
            self.assertFalse(e.get_list()[0] is ee.get_list()[0])
Exemple #4
0
class CopyTests(unittest.TestCase):

    def setUp(self):
        self.s = PDBParser(PERMISSIVE=True).get_structure(
            'X', "PDB/a_structure.pdb")
        self.m = self.s.get_list()[0]
        self.c = self.m.get_list()[0]
        self.r = self.c.get_list()[0]
        self.a = self.r.get_list()[0]

    def test_atom_copy(self):
        aa = self.a.copy()
        self.assertFalse(self.a is aa)
        self.assertFalse(self.a.get_coord() is aa.get_coord())

    def test_entitity_copy(self):
        """Make a copy of a residue."""
        for e in (self.s, self.m, self.c, self.r):
            ee = e.copy()
            self.assertFalse(e is ee)
            self.assertFalse(e.get_list()[0] is ee.get_list()[0])
class CopyTests(unittest.TestCase):
    """Tests copying SMCRA objects."""
    def setUp(self):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            self.s = PDBParser(PERMISSIVE=True).get_structure(
                "X", "PDB/a_structure.pdb")
        self.m = self.s.get_list()[0]
        self.c = self.m.get_list()[0]
        self.r = self.c.get_list()[0]
        self.a = self.r.get_list()[0]

    def test_atom_copy(self):
        aa = self.a.copy()
        self.assertIsNot(self.a, aa)
        self.assertIsNot(self.a.get_coord(), aa.get_coord())

    def test_entity_copy(self):
        """Make a copy of a residue."""
        for e in (self.s, self.m, self.c, self.r):
            ee = e.copy()
            self.assertIsNot(e, ee)
            self.assertIsNot(e.get_list()[0], ee.get_list()[0])
Exemple #6
0
class Parsed_pdb_file:
    """
    Class to represent a parsed PDB file, which information can be used to add
    'PyMod_sequence_element' with structures to PyMod.
    """

    # counter = 0
    parsed_file_code = "parsed_by_pymod"
    blank_chain_character = "X"

    def __init__(self, pymod, pdb_file_path, output_directory="", new_file_name=None, copy_original_file=True, save_chains_files=True):

        # self.list_of_structure_dicts = []
        self.pymod = pymod

        #------------------------------------------------------------------------------------
        # Defines the name of the files which will be built from the parsed structure file. -
        #------------------------------------------------------------------------------------

        self.output_directory = output_directory # Directory where the output files (such as the split chains files) are going to be built.
        self.original_pdb_file_path = pdb_file_path # Path of the original structure file on the user's system.
        self.original_base_name = os.path.splitext(os.path.basename(self.original_pdb_file_path))[0] # Original basename.
        # Define the name of the structures files derived from the original file.
        if not new_file_name:
            self.structure_file_name = self.original_base_name
        else:
            self.structure_file_name = os.path.splitext(os.path.basename(new_file_name))[0]

        #-------------------------------------------------------------------------------------
        # Initially copies the full PDB file in the ouptut directory using a temporary name. -
        #-------------------------------------------------------------------------------------

        copied_full_file_path = os.path.join(self.output_directory, self._get_full_structure_file_name())
        if copy_original_file and not os.path.isfile(copied_full_file_path):
            shutil.copy(self.original_pdb_file_path, copied_full_file_path)

        #------------------------------
        # Parses the header manually. -
        #------------------------------

        pass

        #--------------------------------------------------------------------------------
        # Split the sequence in chains, get the sequences and residues using Biopython. -
        #--------------------------------------------------------------------------------

        warnings.simplefilter("ignore")

        # Actually parses the original structure file on the user's system.
        parsed_file_handle = open(self.original_pdb_file_path, "r")
        # Creates a biopython 'Structure' object and starts to take informations from it.
        self.parsed_biopython_structure = PDBParser(PERMISSIVE=1, QUIET=True).get_structure(self.parsed_file_code, parsed_file_handle)
        parsed_file_handle.close()

        # The items of this list will contain information used to build the elements to be loaded
        # in PyMod.
        list_of_parsed_chains = []

        # ligands_ids = [] # Ligand heteroatoms and water molecules ids.

        # Starts to iterate through the models in the biopython object.
        for model in self.parsed_biopython_structure.get_list():
            for chain in model.get_list():
                parsed_chain = {"original_id": None, # Chain ID in the PDB file.
                                "pymod_id": None, # The ID assigned in PyMod.
                                "residues": [],
                                "file_name": None,
                                "file_path": None,
                                "found_blank_chain": None,
                                "has_standard_residues": False}

                # Assigns a blank "X" chain id for PDB structures that do not specify chains id.
                if chain.id != " ":
                    parsed_chain["pymod_id"] = chain.id
                    parsed_chain["found_blank_chain"] = False
                elif chain.id == " ":
                    chain.id = self.blank_chain_character
                    parsed_chain["pymod_id"] = self.blank_chain_character
                    parsed_chain["found_blank_chain"] = True

                # Starts to build the sequences by parsing through every residue of the chain.
                for residue in chain:
                    # Gets the 3 letter name of the current residue.
                    resname = residue.get_resname()
                    # get_id() returns something like: ('H_SCN', 1101, ' '). The first item is
                    # the hetfield: 'H_SCN' for an HETRES, while ' ' for a normal residue. The
                    # second item is the id of the residue according to the PDB file.
                    hetfield, pdb_position = residue.get_id()[0:2]
                    # For HETATM residues.
                    if hetfield[0] == "H":
                        # Check if the current HETRES is a modified residue. Modified residues will
                        # be added to the primary sequence.
                        if self._check_modified_residue(residue, chain):
                            parsed_chain["residues"].append(pymod_residue.PyMod_modified_residue(three_letter_code=resname, one_letter_code=pymod_vars.modified_residue_one_letter, db_index=pdb_position))
                        else:
                            parsed_chain["residues"].append(pymod_residue.PyMod_ligand(three_letter_code=resname, one_letter_code=pymod_vars.ligand_one_letter, db_index=pdb_position))
                    # For water molecules.
                    elif hetfield == "W":
                        parsed_chain["residues"].append(pymod_residue.PyMod_water_molecule(three_letter_code=resname, one_letter_code=pymod_vars.water_one_letter, db_index=pdb_position))
                    # For standard amminoacidic residues. Adds them to the primary sequence.
                    else:
                        parsed_chain["residues"].append(pymod_residue.PyMod_standard_residue(three_letter_code=resname, one_letter_code=seq_manipulation.three2one(resname), db_index=pdb_position))
                        parsed_chain["has_standard_residues"] = True

                # Only adds the chain to PyMod if it has at least one standard residue.
                if parsed_chain["has_standard_residues"]:
                    list_of_parsed_chains.append(parsed_chain)

            # Stops after having parsed the first "model" in the biopython "Structure". This is
            # needed to import only the first model of multimodel files (such as NMR files).
            break

        #----------------------------------------------------------------------
        # Build 'PyMod_elements' object for each chain of the structure file. -
        #----------------------------------------------------------------------

        self.list_of_pymod_elements = []
        self.list_of_chains_structure_args = []

        for numeric_chain_id, parsed_chain in enumerate(list_of_parsed_chains):
            # Defines the path of the element chain structure file. Initially uses a temporary name
            # for the file names. When the structures will be loaded in PyMod/PyMOL they will be
            # renamed using the header of the PyMod element.
            parsed_chain["file_name"] = self._get_structure_chain_file_name(parsed_chain["pymod_id"])
            parsed_chain["file_path"] = os.path.join(self.output_directory, parsed_chain["file_name"])
            # Builds the new 'PyMod_element'. The header will be used to rename the chains once They
            # are loaded in PyMod/PyMOL.
            new_element_header = self._get_new_pymod_element_header(parsed_chain["pymod_id"], parsed_chain["found_blank_chain"])

            new_element = self._build_pymod_element(residues=parsed_chain["residues"], element_header=new_element_header, color=self._get_chain_color(numeric_chain_id))
            # Builds the new structure for the PyMod element.
            new_structure = PyMod_structure(file_name_root=self.structure_file_name,
                             full_file_path=copied_full_file_path,
                             chain_file_path=parsed_chain["file_path"],
                             chain_id=parsed_chain["pymod_id"], numeric_chain_id=numeric_chain_id,
                             original_structure_file_path=self.original_pdb_file_path,
                             original_structure_id=self.pymod.pdb_counter)
            new_element.set_structure(new_structure)

            self.list_of_pymod_elements.append(new_element)
            self.list_of_chains_structure_args.append(new_structure)

        #------------------------------------------------------------------------------------
        # Saves a PDB file with only the current chain of the first model of the structure. -
        #------------------------------------------------------------------------------------

        if save_chains_files:

            io = PDBIO()
            io.set_structure(self.parsed_biopython_structure)

            for element in self.list_of_pymod_elements:

                saved_structure_filepath = element.get_structure_file(basename_only=False)
                io.save(saved_structure_filepath, Select_chain_and_first_model(element.get_chain_id()))

                #-----------------------------------------
                # Fix the chain's PDB file if necessary. -
                #-----------------------------------------

                # Checks if there are any ligands or water molecules before the polypeptide
                # chain atoms.
                found_lig = False
                reorder_structure_file = False

                for residue in element.get_residues(standard=True, modified_residues=True, ligands=True, water=True):
                    if residue.is_polymer_residue():
                        if found_lig:
                            reorder_structure_file = True
                            break
                    if not residue.is_polymer_residue():
                        found_lig = True

                # Reorder the residues in the structure file, so that the polymeric residues are put
                # first and the ligands and water molecules for last.
                if reorder_structure_file:

                    np_hetatms = element.get_residues(standard=False, modified_residues=False, ligands=True, water=True)
                    np_hetatms_ids = [h.db_index for h in np_hetatms]

                    pol_lines = [] # This will be populated with the polymer chain lines.
                    np_hetatms_lines = [] # This will be populated with the non-polymer heteroatom lines.
                    str_fh = open(saved_structure_filepath, "r")
                    for line_i, line in enumerate(str_fh):
                        if line.startswith(("HETATM", "ATOM")):
                            db_index = int(line[22:26])
                            if db_index in np_hetatms_ids:
                                np_hetatms_lines.append(line)
                            else:
                                pol_lines.append(line)
                    str_fh.close()

                    # Rewrite the structure file with the new atom order.
                    str_fh = open(saved_structure_filepath, "w")
                    atm_count = 1
                    for l in pol_lines + np_hetatms_lines:
                        str_fh.write(l[0:6] + str(atm_count).rjust(5, " ") + l[11:])
                        atm_count += 1
                    str_fh.write("TER" + str(atm_count).rjust(8, " ") + l[17:26].rjust(15, " ") + "\n")
                    str_fh.write("END\n")
                    str_fh.close()

        warnings.simplefilter("always")


        #-------------------------------
        # Finds the disulfide bridges. -
        #-------------------------------

        self._assign_disulfide_bridges()


        #-------------
        # Completes. -
        #-------------

        self.pymod.pdb_counter += 1


    def _correct_chain_id(self, chain_id):
        if chain_id != " ":
            return chain_id
        else:
            return self.blank_chain_character


    def get_pymod_elements(self):
        return self.list_of_pymod_elements

    def get_chains_ids(self):
        return [struct.chain_id for struct in self.list_of_chains_structure_args]

    def get_pymod_element_by_chain(self, chain_id):
        for e in self.list_of_pymod_elements:
            if e.get_chain_id() == chain_id:
                return e
        raise KeyError("No element with chain '%s' was built from the parsed PDB file." % chain_id)


    #################################################################
    # Check if a residue is part of a molecule.                     #
    #################################################################

    peptide_bond_distance = 1.8

    def _check_modified_residue(self, residue, chain):
        """
        Returns 'True' if the residue is a modified residue, 'False' if is a ligand.
        """
        if not self._check_polypetide_atoms(residue):
            return False
        # Checks if the heteroresidue is forming a peptide bond with its C carboxy atom.
        has_carboxy_link = self._find_links(residue, chain, self._get_carboxy_atom, self._get_amino_atom, "N")
        # Checks if the heteroresidue is forming a peptide bond with its N amino atom.
        has_amino_link = self._find_links(residue, chain, self._get_amino_atom, self._get_carboxy_atom, "C")
        return has_carboxy_link or has_amino_link

    def _check_polypetide_atoms(self, residue):
        """
        Checks if a residue has the necessary atoms to make peptide bonds.
        """
        return pymod_vars.std_amino_acid_backbone_atoms < set(residue.child_dict.keys()) or pymod_vars.mod_amino_acid_backbone_atoms < set(residue.child_dict.keys())

    def _find_links(self, residue, chain, get_residue_atom, get_neighbour_atom, link="N"):

        # Get either the N amino or C carboxy atom of the residue.
        residue_atom = get_residue_atom(residue)
        if residue_atom == None:
            return None

        # Find other residues in the chain having an atom which can make a peptide bond with the
        # 'residue' provided in the argument.
        neighbour_atoms = []
        for res in chain:
            if not res == residue:
                neighbour_atom = get_neighbour_atom(res)
                if neighbour_atom != None:
                    neighbour_atoms.append(neighbour_atom)

        # Check if there is a neighbour atom close enough to make a peptide bond with it. If such
        # atom is found, the residue is a part of a polypeptide chain.
        for atom in neighbour_atoms:
            distance = atom - residue_atom
            if distance < 1.8:
                return True
        return False

    def _get_carboxy_atom(self, residue):
        return self._get_atom_by_type(residue, ("C", "C1"))

    def _get_amino_atom(self, residue):
        return self._get_atom_by_type(residue, ("N", "N2"))

    def _get_atom_by_type(self, residue, atom_types_tuple):
        for atom_type in atom_types_tuple:
            if atom_type in residue.child_dict:
                if not residue.child_dict[atom_type].is_disordered():
                    return residue.child_dict[atom_type]
        return None


    #################################################################
    # Build files and PyMod elements from the parsed structure.     #
    #################################################################

    def _get_new_pymod_element_header(self, chain_id, found_blank_chain=False, compact_names=False):
        if not compact_names:
            parsed_chain_name = "%s_chain_%s.pdb" % (self.structure_file_name, chain_id)
        else:
            if not found_blank_chain:
                parsed_chain_name = "%s_%s.pdb" % (self.structure_file_name, chain_id)
            else:
                parsed_chain_name = "%s.pdb" % (self.structure_file_name)
        return os.path.splitext(parsed_chain_name)[0]

    def _get_structure_chain_file_name(self, chain_id):
        return pymod_vars.structure_chain_temp_name % (self.pymod.pdb_counter, chain_id)

    def _get_full_structure_file_name(self):
        return pymod_vars.structure_temp_name % self.pymod.pdb_counter

    def _build_pymod_element(self, residues, element_header, color):
        return self.pymod.build_pymod_element(pymod_element.PyMod_sequence_element, residues=residues, header=element_header, color=color)

    def _get_chain_color(self, chain_number):
        list_of_model_chains_colors = pymod_vars.pymol_regular_colors_list
        return list_of_model_chains_colors[chain_number % len(list_of_model_chains_colors)]


    #################################################################
    # Analyze structural features.                                  #
    #################################################################

    def _assign_disulfide_bridges(self):
        """
        Assigns disulfide bridges to the PyMod elements built from the parsed structure file.
        """
        list_of_disulfides = get_disulfide_bridges_of_structure(self.parsed_biopython_structure)
        for dsb in list_of_disulfides:
            # Get the chain of the first SG atom.
            dsb_chain_i = self._correct_chain_id(dsb["chain_i"])
            # Get the chain of the second SG.
            dsb_chain_j = self._correct_chain_id(dsb["chain_j"])
            new_dsb = Disulfide_bridge(cys1=dsb["residue_i"][1], cys2=dsb["residue_j"][1],
                                       cys1_seq_index=self.get_pymod_element_by_chain(dsb_chain_i).get_residue_by_db_index(dsb["residue_i"][1]).seq_index,
                                       cys2_seq_index=self.get_pymod_element_by_chain(dsb_chain_j).get_residue_by_db_index(dsb["residue_j"][1]).seq_index,
                                       cys1_chain=dsb_chain_i, cys2_chain=dsb_chain_j,
                                       distance=dsb["distance"], chi3_dihedral=dsb["chi3_dihedral"])
            # For intrachain residues.
            if dsb_chain_i == dsb_chain_j:
                self.get_pymod_element_by_chain(dsb_chain_i).add_disulfide(disulfide=new_dsb)
            # For interchain residues.
            else:
                self.get_pymod_element_by_chain(dsb_chain_j).add_disulfide(disulfide=new_dsb)
                self.get_pymod_element_by_chain(dsb_chain_i).add_disulfide(disulfide=new_dsb)