Python MMCIFParser Examples, Bio.PDB.MMCIFParser.MMCIFParser Python Examples

Example #1

0

Show file

File: test_MMCIF.py Project: vincentdavis/biopython

    def test_insertions(self):
        """Test file with residue insertion codes"""
        parser = MMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/4ZHL.cif")
        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 2)
            pp = polypeptides[0]
            # Check the start and end positions (first segment only)
            self.assertEqual(pp[0].get_id()[1], 16)
            self.assertEqual(pp[-1].get_id()[1], 244)
            # Check the sequence
            refseq = (
                "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
                "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
                "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
                "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
            )

            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(refseq, str(s))

Example #2

0

Show file

File: test_PDB_MMCIFParser.py Project: wwydmanski/biopython

 def test_insertions(self):
     """Test file with residue insertion codes."""
     parser = MMCIFParser(QUIET=1)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", PDBConstructionWarning)
         structure = parser.get_structure("example", "PDB/4ZHL.cif")
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 2)
         pp = polypeptides[0]
         # Check the start and end positions (first segment only)
         self.assertEqual(pp[0].get_id()[1], 16)
         self.assertEqual(pp[-1].get_id()[1], 244)
         # Check the sequence
         refseq = (
             "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
             "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
             "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
             "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
         )
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(refseq, str(s))

Example #3

0

Show file

File: test_microhet.py Project: abradle/biopython

 def test_mmtf(self):
     """Parse mmCIF file."""
     with warnings.catch_warnings():
         mmcif_parser = MMCIFParser()
         warnings.simplefilter('ignore', PDBConstructionWarning)
         structure = mmcif_parser.get_structure("MICR", "PDB/1EJG.cif")
         print(structure)

Example #4

0

Show file

File: test_PDB_MMCIFParser.py Project: anntzer/biopython

    def test_conversion(self):
        """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare"""

        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()
        pdb_writer.save(filename)

        pdb_parser = PDBParser(QUIET=1)
        pdb_struct = pdb_parser.get_structure('example_pdb', filename)

        # comparisons
        self.assertEqual(len(pdb_struct), len(cif_struct))

        pdb_atom_names = [a.name for a in pdb_struct.get_atoms()]
        cif_atom_names = [a.name for a in cif_struct.get_atoms()]
        self.assertEqual(len(pdb_atom_names), len(cif_atom_names))
        self.assertSequenceEqual(pdb_atom_names, cif_atom_names)

        pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        cif_atom_elems = [a.element for a in cif_struct.get_atoms()]
        self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)

Example #5

0

Show file

File: test_PDB_MMCIFParser.py Project: anntzer/biopython

    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array([0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array([1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221], dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)

Example #6

0

Show file

File: test_PDB_MMCIFParser.py Project: wangdang511/biopython

    def test_conversion(self):
        """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare"""

        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()
        pdb_writer.save(filename)

        pdb_parser = PDBParser(QUIET=1)
        pdb_struct = pdb_parser.get_structure('example_pdb', filename)

        # comparisons
        self.assertEqual(len(pdb_struct), len(cif_struct))

        pdb_atom_names = [a.name for a in pdb_struct.get_atoms()]
        cif_atom_names = [a.name for a in pdb_struct.get_atoms()]
        self.assertEqual(len(pdb_atom_names), len(cif_atom_names))
        self.assertSequenceEqual(pdb_atom_names, cif_atom_names)

        pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        cif_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)

Example #7

0

Show file

File: myPDBParser.py Project: minghao2016/BIPSPI

 def __init__(self, structure_builder=None, QUIET=False):
     PDBParser.__init__(self,
                        structure_builder=structure_builder,
                        QUIET=QUIET)
     MMCIFParser.__init__(self,
                          structure_builder=structure_builder,
                          QUIET=QUIET)

Example #8

0

Show file

def get_info_mmcif(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    coord_ca = {}
    bary = {}
    for chain in structure[0]:
        coord_ca[chain] = []
        bary[chain] = 0
        for residue in chain:
            if residue.has_id('CA'):
                coord_ca[chain].append(residue['CA'].get_coord())
            else:
                coord_moy = [0, 0, 0]
                for atom in residue:
                    coord_at = atom.get_coord()
                    coord_moy = [coord_at[i] / len(residue) for i in range(3)]
                coord_ca[chain].append(coord_moy)
        coord_ca[chain] = np.asarray(coord_ca[chain])
        bary[chain] = np.array([np.mean(coord_ca[chain][i]) for i in range(3)])
    enf = {}
    for chain in structure[0]:
        enf[chain] = []
        for coord in coord_ca[chain]:
            enf[chain].append(np.linalg.norm(coord - bary[chain]))
    #ppb = PPBuilder()
    #seqpdb = ppb.build_peptides(chain)[0].get_sequence()
    return bary, enf

Example #9

0

Show file

File: CIF2PDB.py Project: zhujianwei31415/RaptorX-3DModeling

def CIF2PDB(ciffile, pdbfile, verbose=False):

    #Not sure why biopython needs this to read a cif file
    strucid = ciffile[:4] if len(ciffile) > 4 else "1xxx"

    # Read file
    parser = MMCIFParser()
    structure = parser.get_structure(strucid, ciffile)

    # rename long chains
    try:
        chainmap = rename_chains(structure)
    except OutOfChainsError:
        logging.error("Too many chains to represent in PDB format")
        sys.exit(1)

    if verbose:
        for new, old in chainmap.items():
            if new != old:
                logging.info("Renaming chain {0} to {1}".format(old, new))

    #Write PDB
    io = PDBIO()
    io.set_structure(structure)
    #TODO What happens with large structures?
    io.save(pdbfile)

    return pdbfile

Example #10

0

Show file

 def from_list(cls, reslist, cif_path, parent_entry, annotate=True):
     """Construct PdbSite object directly from residue list"""
     mmcif_dict = dict()
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     site = cls()
     site.parent_entry = parent_entry
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn(
             'Could not build site from residue list. Check entry',
             RuntimeWarning)
         return
     for res in reslist:
         if structure:
             res.add_structure(structure)
         site.add(res)
     if annotate:
         site.parent_structure = structure
         site.mmcif_dict = mmcif_dict
         site.find_ligands()
     return site

Example #11

0

Show file

File: test_PDB_MMCIFParser.py Project: anntzer/biopython

    def test_filehandle(self):
        """Test if the parser can handle file handle as well as filename"""
        parser = MMCIFParser()
        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual(len(structure), 1)

        structure = parser.get_structure("example", open("PDB/1A8O.cif"))
        self.assertEqual(len(structure), 1)

Example #12

0

Show file

File: test_PDB_MMCIFParser.py Project: yuanzhw/biopython

    def test_filehandle(self):
        """Test if the parser can handle file handle as well as filename."""
        parser = MMCIFParser()
        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual(len(structure), 1)

        structure = parser.get_structure("example", open("PDB/1A8O.cif"))
        self.assertEqual(len(structure), 1)

Example #13

0

Show file

File: ge82sey.py Project: annareithmeir/CodePlagiarismDetectionTool

 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     parser = MMCIFParser()
     self.structure = parser.get_structure('PHA-L', path)

Example #14

0

Show file

File: ge56sen.py Project: annareithmeir/CodePlagiarismDetectionTool

 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     CIF_PARSER = MMCIFParser()
     self.structure = CIF_PARSER.get_structure('PHA-L',path) # Parse the structure once and re-use it in the functions below

Example #15

0

Show file

File: test_PDB_MMCIFParser.py Project: ttung/biopython

    def test_conversion_not_preserve_numbering(self):
        """Convert mmCIF to PDB and renumber atom serials."""
        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/a_structure.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()

        pdb_writer.save(filename, preserve_atom_numbering=False)

Example #16

0

Show file

 def __init__(self, path):
     """
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     """
     # parser object for reading in structure in CIF format
     parser = MMCIFParser()
     # Parse the structure once and re-use it in the functions below
     self.structure = parser.get_structure('some structure string here, e.g. 7AHL', path)

Example #17

0

Show file

File: test_PDB_MMCIFParser.py Project: ttung/biopython

    def test_conversion_preserve_numbering(self):
        """Convert mmCIF to PDB and preserve original serial numbering."""
        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/a_structure.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()

        with self.assertRaises(ValueError):
            pdb_writer.save(filename, preserve_atom_numbering=True)

Example #18

0

Show file

File: test_mmtf.py Project: KamyNz/biopython

    def test_compare_to_mmcif(self):
        """Compre the MMTF and mmCIF parsed structrues"""
        def test_atoms(parse_mmtf):
            """Test that all atoms in self.mmtf_atoms and self.mmcif_atoms are equivalent"""
            parse_mmtf.assertEqual(len(parse_mmtf.mmcif_atoms), len(parse_mmtf.mmtf_atoms))
            for i, e in enumerate(parse_mmtf.mmcif_atoms):
                mmtf_atom = parse_mmtf.mmtf_atoms[i]
                mmcif_atom = parse_mmtf.mmcif_atoms[i]
                parse_mmtf.assertEqual(mmtf_atom.name, mmcif_atom.name)  # eg. CA, spaces are removed from atom name
                parse_mmtf.assertEqual(mmtf_atom.fullname, mmcif_atom.fullname)  # e.g. " CA ", spaces included
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[0], mmcif_atom.coord[0], places=3)
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[1], mmcif_atom.coord[1], places=3)
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[2], mmcif_atom.coord[2], places=3)
                parse_mmtf.assertEqual(mmtf_atom.bfactor, mmcif_atom.bfactor)
                parse_mmtf.assertEqual(mmtf_atom.occupancy, mmcif_atom.occupancy)
                parse_mmtf.assertEqual(mmtf_atom.altloc, mmcif_atom.altloc)
                parse_mmtf.assertEqual(mmtf_atom.full_id,
                                       mmcif_atom.full_id)  # (structure id, model id, chain id, residue id, atom id)
                parse_mmtf.assertEqual(mmtf_atom.id, mmcif_atom.name)  # id of atom is the atom name (e.g. "CA")
                # self.assertEqual(mmtf_atom.serial_number,mmcif_atom.serial_number) # mmCIF serial number is none
        def test_residues(parse_mmtf):
            """Test that all residues in self.mmcif_res and self.mmtf_res are equivalent"""
            parse_mmtf.assertEqual(len(parse_mmtf.mmcif_res), len(parse_mmtf.mmtf_res))
            for i, e in enumerate(parse_mmtf.mmcif_res):
                mmcif_r = parse_mmtf.mmcif_res[i]
                mmtf_r = parse_mmtf.mmtf_res[i]
                parse_mmtf.assertEqual(mmtf_r.level, mmcif_r.level)
                parse_mmtf.assertEqual(mmtf_r.disordered, mmcif_r.disordered)
                parse_mmtf.assertEqual(mmtf_r.resname, mmcif_r.resname)
                parse_mmtf.assertEqual(mmtf_r.segid, mmcif_r.segid)
                parse_mmtf.mmcif_atoms = [x for x in mmcif_r.get_atom()]
                parse_mmtf.mmtf_atoms = [x for x in mmtf_r.get_atom()]
                test_atoms(parse_mmtf=parse_mmtf)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            mmtf_struct = MMTFParser.get_structure("PDB/4CUP.mmtf")
        mmcif_parser = MMCIFParser()
        mmcif_struct = mmcif_parser.get_structure("example", "PDB/4CUP.cif")
        self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()]
        self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()]
        test_atoms(self)
        mmcif_chains = [x for x in mmcif_struct.get_chains()]
        mmtf_chains = [x for x in mmtf_struct.get_chains()]
        self.assertEqual(len(mmcif_chains), len(mmtf_chains))
        for i, e in enumerate(mmcif_chains):
            self.mmcif_res = [x for x in mmcif_chains[i].get_residues()]
            self.mmtf_res = [x for x in mmtf_chains[i].get_residues()]
            test_residues(self)

        self.mmcif_res = [x for x in mmcif_struct.get_residues()]
        self.mmtf_res = [x for x in mmtf_struct.get_residues()]
        test_residues(self)
        self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))

Example #19

0

Show file

def clean_pdb(file_input, file_output, chain_to_keep, parameters):
    parser = MMCIFParser()

    structure = parser.get_structure(file_input[:-4].upper(), file_input)

    structure = remove_chains(structure, chain_to_keep)
    structure = remove_extra_atoms(structure, parameters)

    io = MMCIFIO()
    io.set_structure(structure)
    io.save(file_output)

Example #20

0

Show file

 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     CIF_PARSER     = MMCIFParser() # parser object for reading in structure in CIF format
     i=0
     self.structure = CIF_PARSER.get_structure("Structure",path) # Parse the structure once and re-use it in the functions below
     print(self.get_number_of_water_molecules("D"))

Example #21

0

Show file

 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     cif_parser = MMCIFParser(QUIET=True)  # parser object for reading in structure in CIF format
     self.structure = cif_parser.get_structure('structure', path)
     self.model = self.structure[0]
     self.residue_dict = {k.upper(): v for d in [protein_letters_3to1, {'HOH': ''}] for k, v in d.items()}

Example #22

0

Show file

 def __init__(self,
              structure_builder=None,
              QUIET=False,
              removeHeteroDuplicated=True):
     self.removeHeteroDuplicated = removeHeteroDuplicated
     PDBParser.__init__(self,
                        structure_builder=structure_builder,
                        QUIET=QUIET)
     MMCIFParser.__init__(self,
                          structure_builder=structure_builder,
                          QUIET=QUIET)

Example #23

0

Show file

 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # ==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         # Here non-standard MSE are shown as M
         self.assertEqual(
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         # ==========================================================
         # Now try strict version with only standard amino acids
         # Should ignore MSE 151 at start, and then break the chain
         # at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         # First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         # Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         # Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))

Example #24

0

Show file

File: test_MMCIF.py Project: davidmam/biopython

 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         #==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         #First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         #Here non-standard MSE are shown as M
         self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                          "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         #==========================================================
         #Now try strict version with only standard amino acids
         #Should ignore MSE 151 at start, and then break the chain
         #at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         #First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         #Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         #Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))

Example #25

0

Show file

File: test_PDB_MMCIFParser.py Project: ttung/biopython

    def testModels(self):
        """Test file with multiple models."""
        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)

Example #26

0

Show file

File: test_PDB_MMCIFParser.py Project: anntzer/biopython

    def testModels(self):
        """Test file with multiple models"""

        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
                             str(s))
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
                             str(s))

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)

Example #27

0

Show file

File: Structural_alignment.py Project: josephineyates/EA_BIOINF

def get_atoms(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    pos = []
    model = structure[0]
    for chain in model:
        pos_c = []
        for residue in chain:
            if residue.has_id('CA'):
                vca = residue['CA'].get_vector()
                pos_c.append((residue.get_resname(), vca))
        pos.append(pos_c)
    return pos

Example #28

0

Show file

File: test_RCSBFormats.py Project: iriziotis/biopython_bugfixes

    def setUp(self):

        # Silence!
        warnings.simplefilter("ignore", PDBConstructionWarning)

        pdbparser = PDBParser(QUIET=1)
        cifparser = MMCIFParser(QUIET=1)

        modpath = os.path.abspath(os.path.dirname(__file__))

        pdb_file = os.path.join(modpath, "PDB", "1LCD.pdb")
        cif_file = os.path.join(modpath, "PDB", "1LCD.cif")

        self.pdbo = pdbparser.get_structure("pdb", pdb_file)
        self.cifo = cifparser.get_structure("pdb", cif_file)

Example #29

0

Show file

File: test_RCSBFormats.py Project: BioGeek/biopython

    def setUp(self):

        # Silence!
        warnings.simplefilter('ignore', PDBConstructionWarning)

        pdbparser = PDBParser(QUIET=1)
        cifparser = MMCIFParser(QUIET=1)

        modpath = os.path.abspath(os.path.dirname(__file__))

        pdb_file = os.path.join(modpath, "PDB", "1LCD.pdb")
        cif_file = os.path.join(modpath, "PDB", "1LCD.cif")

        self.pdbo = pdbparser.get_structure('pdb', pdb_file)
        self.cifo = cifparser.get_structure('pdb', cif_file)

Example #30

0

Show file

    def get_structure(self, *args):
        if len(args) == 2:
            pdbId, fileName = args
        elif len(args) == 1:
            fileName = args[0]
            pdbId, fileName = str(fileName), fileName
        else:
            raise ValueError(
                "Error, input should be (id, fileName) or (fileName))")

        if re.match("http(s?)://", fileName):
            r = requests.get(fileName)
            if r.ok:
                fileName = StringIO(r.text)
            else:
                raise Exception("Error downloading pdb")

        try:
            if not isinstance(fileName, str) or not fileName.endswith(".gz"):
                structure = PDBParser.get_structure(self, pdbId, fileName)
            else:
                with gzip.open(fileName) as f:
                    structure = PDBParser.get_structure(self, pdbId, f)
        except Exception as e:
            print(e)
            structure = MMCIFParser.get_structure(self, pdbId, fileName)
        if self.removeHeteroDuplicated:
            structure = self.filterOutDuplicated(structure)
        return structure

Example #31

0

Show file

File: test_mmtf.py Project: iriziotis/biopython_bugfixes

 def test_write(self):
     """Test a simple structure object is written out correctly to MMTF."""
     parser = MMCIFParser()
     struc = parser.get_structure("1A8O", "PDB/1A8O.cif")
     io = MMTFIO()
     io.set_structure(struc)
     filenumber, filename = tempfile.mkstemp()
     os.close(filenumber)
     try:
         io.save(filename)
         struc_back = MMTFParser.get_structure(filename)
         dict_back = mmtf.parse(filename)
         self.assertEqual(dict_back.structure_id, "1A8O")
         self.assertEqual(dict_back.num_models, 1)
         self.assertEqual(dict_back.num_chains, 2)
         self.assertEqual(dict_back.num_groups, 158)
         self.assertEqual(dict_back.num_atoms, 644)
         self.assertEqual(len(dict_back.x_coord_list), 644)
         self.assertEqual(len(dict_back.y_coord_list), 644)
         self.assertEqual(len(dict_back.z_coord_list), 644)
         self.assertEqual(len(dict_back.b_factor_list), 644)
         self.assertEqual(len(dict_back.occupancy_list), 644)
         self.assertEqual(dict_back.x_coord_list[5], 20.022)
         self.assertEqual(set(dict_back.ins_code_list), {"\x00"})
         self.assertEqual(set(dict_back.alt_loc_list), {"\x00"})
         self.assertEqual(list(dict_back.atom_id_list), list(range(1, 645)))
         self.assertEqual(list(dict_back.sequence_index_list),
                          list(range(70)) + [-1] * 88)
         self.assertEqual(dict_back.chain_id_list, ["A", "B"])
         self.assertEqual(dict_back.chain_name_list, ["A", "A"])
         self.assertEqual(dict_back.chains_per_model, [2])
         self.assertEqual(len(dict_back.group_list), 21)
         self.assertEqual(len(dict_back.group_id_list), 158)
         self.assertEqual(len(dict_back.group_type_list), 158)
         self.assertEqual(dict_back.groups_per_chain, [70, 88])
         self.assertEqual(len(dict_back.entity_list), 2)
         self.assertEqual(dict_back.entity_list[0]["type"], "polymer")
         self.assertEqual(dict_back.entity_list[0]["chainIndexList"], [0])
         self.assertEqual(
             dict_back.entity_list[0]["sequence"],
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
         )
         self.assertEqual(dict_back.entity_list[1]["type"], "water")
         self.assertEqual(dict_back.entity_list[1]["chainIndexList"], [1])
         self.assertEqual(dict_back.entity_list[1]["sequence"], "")
     finally:
         os.remove(filename)

Example #32

0

Show file

 def build_all(cls,
               reslist,
               reference_site,
               parent_entry,
               cif_path,
               annotate=True,
               redundancy_cutoff=None):
     """Builds all sites in using as input a list of catalytic residues.
     Returns a list of PdbSite objects"""
     # Map structure objects in every residue
     sites = []
     mmcif_dict = dict()
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn('Could not parse structure {}'.format(
             cif_path, RuntimeWarning))
         return sites
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     # We want all equivalent residues from identical assembly chains
     reslist = PdbSite._get_assembly_residues(reslist, structure)
     # Get seeds to build active sites
     seeds = PdbSite._get_seeds(reslist)
     # Build a site from each seed
     for seed in seeds:
         sites.append(cls.build(seed, reslist, reference_site,
                                parent_entry))
     # Reduce redundancy
     sites = PdbSite._remove_redundant_sites(sites,
                                             cutoff=redundancy_cutoff)
     # Add ligands and annotations
     if annotate and structure:
         for site in sites:
             site.parent_structure = structure
             site.mmcif_dict = mmcif_dict
             site.find_ligands()
     # Flag unclustered sites
     PdbSite._mark_unclustered(sites)
     return sites

Example #33

0

Show file

File: test_PDB_MMCIFParser.py Project: ttung/biopython

    def test_header(self):
        """Test if the parser populates header data."""
        parser = MMCIFParser()

        structure = parser.get_structure("example", "PDB/a_structure.cif")
        self.assertEqual("", structure.header["idcode"])
        self.assertEqual("", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("", structure.header["structure_method"])
        self.assertEqual(0.0, structure.header["resolution"])

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual("1A8O", structure.header["idcode"])
        self.assertEqual("Viral protein", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("X-RAY DIFFRACTION", structure.header["structure_method"])
        self.assertEqual(1.7, structure.header["resolution"])

Example #34

0

Show file

File: PdbIO.py Project: pandora-iu/Biopython

def CifAtomIterator(source):
    """Return SeqRecord objects for each chain in an mmCIF file.

    Argument source is a file-like object or a path to a file.

    The sequences are derived from the 3D structure (_atom_site.* fields)
    in the mmCIF file.

    Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries
    are converted to "X" in the sequence.

    In addition to information from the PDB header (which is the same for all
    records), the following chain specific information is placed in the
    annotation:

    record.annotations["residues"] = List of residue ID strings
    record.annotations["chain"] = Chain ID (typically A, B ,...)
    record.annotations["model"] = Model ID (typically zero)

    Where amino acids are missing from the structure, as indicated by residue
    numbering, the sequence is filled in with 'X' characters to match the size
    of the missing region, and  None is included as the corresponding entry in
    the list record.annotations["residues"].

    This function uses the Bio.PDB module to do most of the hard work. The
    annotation information could be improved but this extra parsing should be
    done in parse_pdb_header, not this module.

    This gets called internally via Bio.SeqIO for the atom based interpretation
    of the PDB file format:

    >>> from Bio import SeqIO
    >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-atom"):
    ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    Equivalently,

    >>> with open("PDB/1A8O.cif") as handle:
    ...     for record in CifAtomIterator(handle):
    ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    """
    # TODO - Add record.annotations to the doctest, esp the residues (not working?)

    # Only import parser when needed, to avoid/delay NumPy dependency in SeqIO
    from Bio.PDB.MMCIFParser import MMCIFParser

    structure = MMCIFParser().get_structure(None, source)
    pdb_id = structure.header["idcode"]
    if not pdb_id:
        warnings.warn("Could not determine the PDB ID.",
                      BiopythonParserWarning)
        pdb_id = "????"
    yield from AtomIterator(pdb_id, structure)

Example #35

0

Show file

File: Structural_alignment.py Project: josephineyates/EA_BIOINF

def get_descriptors(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    pos = []
    model = structure[0]
    hse = HSExposureCB(model)
    for chain in model:
        pos_c = []
        for residue in chain:
            dic = {}
            dic["name"] = residue.get_resname()
            if residue.has_id('CA'):
                vca = residue['CA'].get_vector()
                dic["coord"] = vca
                hse_ = hse[(chain.id, residue.id)]
                dic["hse"] = (hse_[0], hse_[1])
            pos_c.append(dic)
        pos = pos + pos_c
    return pos

Example #36

0

Show file

File: script1.py Project: Cysteine18/Genetic_variants

def func1():
    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.PDBParser import PDBParser
    parser1 = PDBParser(PERMISSIVE=0, QUIET=True)

    from Bio.PDB.PDBIO import PDBIO

    #pathmmcif = "/Users/tarun/Documents/mmCIF"
    #pathmmcif = "/data/pdb/divided/mmCIF"
    pathmmcif = "/Volumes/BIOINFO/mmCIF"
    #pathmmcif = "/Volumes/RCSB_DATA/pdb"

    #count = 0
    #if count == 0:
    try:
        pdb1 = "{}".format(sys.argv[2])
        fol = pdb1[1:3]
        c1 = "{}".format(sys.argv[3])
        pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1)
        #pdbfile = "{}/{}/pdb{}.ent.gz".format(pathmmcif,fol,pdb1)
        tar = gzip.open("{}".format(pdbfile), "rb")
        out = open("pdbprocess.cif", "wb")
        #out = open("pdbprocess.pdb","wb")
        out.write(tar.read())
        tar.close()
        out.close()
        structure_id = "{}".format(pdb1)
        filename = "pdbprocess.cif"
        #filename = "pdbprocess.pdb"
        structure = parser.get_structure(structure_id, filename)
        model = structure[0]
        chain = model["{}".format(c1)]

        io = PDBIO()
        io.set_structure(chain)
        io.save("chain1.pdb")
    except:
        print("FILE NOT FOUND")

Example #37

0

Show file

def get_STR(filePath, fileType='pdb'):
    if fileType == "pdb":
        STR = MMCIFParser(QUIET=True).get_structure("pdb", filePath)
        return STR

    if fileType == "cif":
        DICT = MMCIF2Dict(filePath)
        # print(DICT)
        return DICT
    else:
        raise TypeError("%s is not a valid fileType" % fileType)

Example #38

0

Show file

File: test_mmtf.py Project: ezequieljsosa/biopython

    def check_mmtf_vs_cif(self, mmtf_filename, cif_filename):
        """Compare parsed structures for MMTF and CIF files."""
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            mmtf_struct = MMTFParser.get_structure(mmtf_filename)
        mmcif_parser = MMCIFParser()
        mmcif_struct = mmcif_parser.get_structure("example", cif_filename)
        self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()]
        self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()]
        self.check_atoms()
        mmcif_chains = [x for x in mmcif_struct.get_chains()]
        mmtf_chains = [x for x in mmtf_struct.get_chains()]
        self.assertEqual(len(mmcif_chains), len(mmtf_chains))
        for i, e in enumerate(mmcif_chains):
            self.mmcif_res = [x for x in mmcif_chains[i].get_residues()]
            self.mmtf_res = [x for x in mmtf_chains[i].get_residues()]
            self.check_residues()

        self.mmcif_res = [x for x in mmcif_struct.get_residues()]
        self.mmtf_res = [x for x in mmtf_struct.get_residues()]
        self.check_residues()
        self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))

Example #39

0

Show file

File: test_PDB_MMCIFParser.py Project: yuanzhw/biopython

    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]],
                                     [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array(
                [0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904],
                dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array(
                [1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221],
                dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)

Example #40

0

Show file

File: test_MMCIF.py Project: Pfiver/RNA-Seqlyze

 def testModels(self):
     """Test file with multiple models"""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1LCD.cif")
     self.assertEqual(len(structure), 3)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         #==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         self.assertEqual(structure[1].serial_num, 2)
         self.assertEqual(structure[2].serial_num, 3)
         #First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         #Here non-standard MSE are shown as M
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
         #==========================================================
         #Now try strict version with only standard amino acids
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))

Example #41

0

Show file

File: test_PDB_MMCIFParser.py Project: anntzer/biopython

    def test_parsers(self):
        """Extract polypeptides from 1A80."""

        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num, structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        for atoms in [s_atoms, f_atoms]:
            self.assertEqual(len(atoms), 644)
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)

            self.assertEqual(atoms[0].get_bfactor(), 18.03)
            for atom in atoms:
                self.assertIsNone(atom.get_anisou())