Example #1
0
    def test_insertions(self):
        """Test file with residue insertion codes"""
        parser = MMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/4ZHL.cif")
        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 2)
            pp = polypeptides[0]
            # Check the start and end positions (first segment only)
            self.assertEqual(pp[0].get_id()[1], 16)
            self.assertEqual(pp[-1].get_id()[1], 244)
            # Check the sequence
            refseq = (
                "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
                "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
                "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
                "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
            )

            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(refseq, str(s))
 def test_insertions(self):
     """Test file with residue insertion codes."""
     parser = MMCIFParser(QUIET=1)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", PDBConstructionWarning)
         structure = parser.get_structure("example", "PDB/4ZHL.cif")
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 2)
         pp = polypeptides[0]
         # Check the start and end positions (first segment only)
         self.assertEqual(pp[0].get_id()[1], 16)
         self.assertEqual(pp[-1].get_id()[1], 244)
         # Check the sequence
         refseq = (
             "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
             "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
             "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
             "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
         )
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(refseq, str(s))
Example #3
0
 def test_mmtf(self):
     """Parse mmCIF file."""
     with warnings.catch_warnings():
         mmcif_parser = MMCIFParser()
         warnings.simplefilter('ignore', PDBConstructionWarning)
         structure = mmcif_parser.get_structure("MICR", "PDB/1EJG.cif")
         print(structure)
    def test_conversion(self):
        """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare"""

        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()
        pdb_writer.save(filename)

        pdb_parser = PDBParser(QUIET=1)
        pdb_struct = pdb_parser.get_structure('example_pdb', filename)

        # comparisons
        self.assertEqual(len(pdb_struct), len(cif_struct))

        pdb_atom_names = [a.name for a in pdb_struct.get_atoms()]
        cif_atom_names = [a.name for a in cif_struct.get_atoms()]
        self.assertEqual(len(pdb_atom_names), len(cif_atom_names))
        self.assertSequenceEqual(pdb_atom_names, cif_atom_names)

        pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        cif_atom_elems = [a.element for a in cif_struct.get_atoms()]
        self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array([0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array([1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221], dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
    def test_conversion(self):
        """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare"""

        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()
        pdb_writer.save(filename)

        pdb_parser = PDBParser(QUIET=1)
        pdb_struct = pdb_parser.get_structure('example_pdb', filename)

        # comparisons
        self.assertEqual(len(pdb_struct), len(cif_struct))

        pdb_atom_names = [a.name for a in pdb_struct.get_atoms()]
        cif_atom_names = [a.name for a in pdb_struct.get_atoms()]
        self.assertEqual(len(pdb_atom_names), len(cif_atom_names))
        self.assertSequenceEqual(pdb_atom_names, cif_atom_names)

        pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        cif_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
Example #7
0
 def __init__(self, structure_builder=None, QUIET=False):
     PDBParser.__init__(self,
                        structure_builder=structure_builder,
                        QUIET=QUIET)
     MMCIFParser.__init__(self,
                          structure_builder=structure_builder,
                          QUIET=QUIET)
Example #8
0
def get_info_mmcif(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    coord_ca = {}
    bary = {}
    for chain in structure[0]:
        coord_ca[chain] = []
        bary[chain] = 0
        for residue in chain:
            if residue.has_id('CA'):
                coord_ca[chain].append(residue['CA'].get_coord())
            else:
                coord_moy = [0, 0, 0]
                for atom in residue:
                    coord_at = atom.get_coord()
                    coord_moy = [coord_at[i] / len(residue) for i in range(3)]
                coord_ca[chain].append(coord_moy)
        coord_ca[chain] = np.asarray(coord_ca[chain])
        bary[chain] = np.array([np.mean(coord_ca[chain][i]) for i in range(3)])
    enf = {}
    for chain in structure[0]:
        enf[chain] = []
        for coord in coord_ca[chain]:
            enf[chain].append(np.linalg.norm(coord - bary[chain]))
    #ppb = PPBuilder()
    #seqpdb = ppb.build_peptides(chain)[0].get_sequence()
    return bary, enf
def CIF2PDB(ciffile, pdbfile, verbose=False):

    #Not sure why biopython needs this to read a cif file
    strucid = ciffile[:4] if len(ciffile) > 4 else "1xxx"

    # Read file
    parser = MMCIFParser()
    structure = parser.get_structure(strucid, ciffile)

    # rename long chains
    try:
        chainmap = rename_chains(structure)
    except OutOfChainsError:
        logging.error("Too many chains to represent in PDB format")
        sys.exit(1)

    if verbose:
        for new, old in chainmap.items():
            if new != old:
                logging.info("Renaming chain {0} to {1}".format(old, new))

    #Write PDB
    io = PDBIO()
    io.set_structure(structure)
    #TODO What happens with large structures?
    io.save(pdbfile)

    return pdbfile
Example #10
0
 def from_list(cls, reslist, cif_path, parent_entry, annotate=True):
     """Construct PdbSite object directly from residue list"""
     mmcif_dict = dict()
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     site = cls()
     site.parent_entry = parent_entry
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn(
             'Could not build site from residue list. Check entry',
             RuntimeWarning)
         return
     for res in reslist:
         if structure:
             res.add_structure(structure)
         site.add(res)
     if annotate:
         site.parent_structure = structure
         site.mmcif_dict = mmcif_dict
         site.find_ligands()
     return site
Example #11
0
    def test_filehandle(self):
        """Test if the parser can handle file handle as well as filename"""
        parser = MMCIFParser()
        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual(len(structure), 1)

        structure = parser.get_structure("example", open("PDB/1A8O.cif"))
        self.assertEqual(len(structure), 1)
Example #12
0
    def test_filehandle(self):
        """Test if the parser can handle file handle as well as filename."""
        parser = MMCIFParser()
        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual(len(structure), 1)

        structure = parser.get_structure("example", open("PDB/1A8O.cif"))
        self.assertEqual(len(structure), 1)
 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     parser = MMCIFParser()
     self.structure = parser.get_structure('PHA-L', path)
 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     CIF_PARSER = MMCIFParser()
     self.structure = CIF_PARSER.get_structure('PHA-L',path) # Parse the structure once and re-use it in the functions below
Example #15
0
    def test_conversion_not_preserve_numbering(self):
        """Convert mmCIF to PDB and renumber atom serials."""
        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/a_structure.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()

        pdb_writer.save(filename, preserve_atom_numbering=False)
Example #16
0
 def __init__(self, path):
     """
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     """
     # parser object for reading in structure in CIF format
     parser = MMCIFParser()
     # Parse the structure once and re-use it in the functions below
     self.structure = parser.get_structure('some structure string here, e.g. 7AHL', path)
Example #17
0
    def test_conversion_preserve_numbering(self):
        """Convert mmCIF to PDB and preserve original serial numbering."""
        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/a_structure.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()

        with self.assertRaises(ValueError):
            pdb_writer.save(filename, preserve_atom_numbering=True)
Example #18
0
    def test_compare_to_mmcif(self):
        """Compre the MMTF and mmCIF parsed structrues"""
        def test_atoms(parse_mmtf):
            """Test that all atoms in self.mmtf_atoms and self.mmcif_atoms are equivalent"""
            parse_mmtf.assertEqual(len(parse_mmtf.mmcif_atoms), len(parse_mmtf.mmtf_atoms))
            for i, e in enumerate(parse_mmtf.mmcif_atoms):
                mmtf_atom = parse_mmtf.mmtf_atoms[i]
                mmcif_atom = parse_mmtf.mmcif_atoms[i]
                parse_mmtf.assertEqual(mmtf_atom.name, mmcif_atom.name)  # eg. CA, spaces are removed from atom name
                parse_mmtf.assertEqual(mmtf_atom.fullname, mmcif_atom.fullname)  # e.g. " CA ", spaces included
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[0], mmcif_atom.coord[0], places=3)
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[1], mmcif_atom.coord[1], places=3)
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[2], mmcif_atom.coord[2], places=3)
                parse_mmtf.assertEqual(mmtf_atom.bfactor, mmcif_atom.bfactor)
                parse_mmtf.assertEqual(mmtf_atom.occupancy, mmcif_atom.occupancy)
                parse_mmtf.assertEqual(mmtf_atom.altloc, mmcif_atom.altloc)
                parse_mmtf.assertEqual(mmtf_atom.full_id,
                                       mmcif_atom.full_id)  # (structure id, model id, chain id, residue id, atom id)
                parse_mmtf.assertEqual(mmtf_atom.id, mmcif_atom.name)  # id of atom is the atom name (e.g. "CA")
                # self.assertEqual(mmtf_atom.serial_number,mmcif_atom.serial_number) # mmCIF serial number is none
        def test_residues(parse_mmtf):
            """Test that all residues in self.mmcif_res and self.mmtf_res are equivalent"""
            parse_mmtf.assertEqual(len(parse_mmtf.mmcif_res), len(parse_mmtf.mmtf_res))
            for i, e in enumerate(parse_mmtf.mmcif_res):
                mmcif_r = parse_mmtf.mmcif_res[i]
                mmtf_r = parse_mmtf.mmtf_res[i]
                parse_mmtf.assertEqual(mmtf_r.level, mmcif_r.level)
                parse_mmtf.assertEqual(mmtf_r.disordered, mmcif_r.disordered)
                parse_mmtf.assertEqual(mmtf_r.resname, mmcif_r.resname)
                parse_mmtf.assertEqual(mmtf_r.segid, mmcif_r.segid)
                parse_mmtf.mmcif_atoms = [x for x in mmcif_r.get_atom()]
                parse_mmtf.mmtf_atoms = [x for x in mmtf_r.get_atom()]
                test_atoms(parse_mmtf=parse_mmtf)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            mmtf_struct = MMTFParser.get_structure("PDB/4CUP.mmtf")
        mmcif_parser = MMCIFParser()
        mmcif_struct = mmcif_parser.get_structure("example", "PDB/4CUP.cif")
        self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()]
        self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()]
        test_atoms(self)
        mmcif_chains = [x for x in mmcif_struct.get_chains()]
        mmtf_chains = [x for x in mmtf_struct.get_chains()]
        self.assertEqual(len(mmcif_chains), len(mmtf_chains))
        for i, e in enumerate(mmcif_chains):
            self.mmcif_res = [x for x in mmcif_chains[i].get_residues()]
            self.mmtf_res = [x for x in mmtf_chains[i].get_residues()]
            test_residues(self)

        self.mmcif_res = [x for x in mmcif_struct.get_residues()]
        self.mmtf_res = [x for x in mmtf_struct.get_residues()]
        test_residues(self)
        self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))
Example #19
0
def clean_pdb(file_input, file_output, chain_to_keep, parameters):
    parser = MMCIFParser()

    structure = parser.get_structure(file_input[:-4].upper(), file_input)

    structure = remove_chains(structure, chain_to_keep)
    structure = remove_extra_atoms(structure, parameters)

    io = MMCIFIO()
    io.set_structure(structure)
    io.save(file_output)
Example #20
0
 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     CIF_PARSER     = MMCIFParser() # parser object for reading in structure in CIF format
     i=0
     self.structure = CIF_PARSER.get_structure("Structure",path) # Parse the structure once and re-use it in the functions below
     print(self.get_number_of_water_molecules("D"))
Example #21
0
 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     cif_parser = MMCIFParser(QUIET=True)  # parser object for reading in structure in CIF format
     self.structure = cif_parser.get_structure('structure', path)
     self.model = self.structure[0]
     self.residue_dict = {k.upper(): v for d in [protein_letters_3to1, {'HOH': ''}] for k, v in d.items()}
Example #22
0
 def __init__(self,
              structure_builder=None,
              QUIET=False,
              removeHeteroDuplicated=True):
     self.removeHeteroDuplicated = removeHeteroDuplicated
     PDBParser.__init__(self,
                        structure_builder=structure_builder,
                        QUIET=QUIET)
     MMCIFParser.__init__(self,
                          structure_builder=structure_builder,
                          QUIET=QUIET)
Example #23
0
 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # ==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         # Here non-standard MSE are shown as M
         self.assertEqual(
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         # ==========================================================
         # Now try strict version with only standard amino acids
         # Should ignore MSE 151 at start, and then break the chain
         # at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         # First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         # Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         # Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))
Example #24
0
 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         #==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         #First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         #Here non-standard MSE are shown as M
         self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                          "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         #==========================================================
         #Now try strict version with only standard amino acids
         #Should ignore MSE 151 at start, and then break the chain
         #at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         #First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         #Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         #Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))
Example #25
0
    def testModels(self):
        """Test file with multiple models."""
        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
Example #26
0
    def testModels(self):
        """Test file with multiple models"""

        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
                             str(s))
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
                             str(s))

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
def get_atoms(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    pos = []
    model = structure[0]
    for chain in model:
        pos_c = []
        for residue in chain:
            if residue.has_id('CA'):
                vca = residue['CA'].get_vector()
                pos_c.append((residue.get_resname(), vca))
        pos.append(pos_c)
    return pos
    def setUp(self):

        # Silence!
        warnings.simplefilter("ignore", PDBConstructionWarning)

        pdbparser = PDBParser(QUIET=1)
        cifparser = MMCIFParser(QUIET=1)

        modpath = os.path.abspath(os.path.dirname(__file__))

        pdb_file = os.path.join(modpath, "PDB", "1LCD.pdb")
        cif_file = os.path.join(modpath, "PDB", "1LCD.cif")

        self.pdbo = pdbparser.get_structure("pdb", pdb_file)
        self.cifo = cifparser.get_structure("pdb", cif_file)
Example #29
0
    def setUp(self):

        # Silence!
        warnings.simplefilter('ignore', PDBConstructionWarning)

        pdbparser = PDBParser(QUIET=1)
        cifparser = MMCIFParser(QUIET=1)

        modpath = os.path.abspath(os.path.dirname(__file__))

        pdb_file = os.path.join(modpath, "PDB", "1LCD.pdb")
        cif_file = os.path.join(modpath, "PDB", "1LCD.cif")

        self.pdbo = pdbparser.get_structure('pdb', pdb_file)
        self.cifo = cifparser.get_structure('pdb', cif_file)
Example #30
0
    def get_structure(self, *args):
        if len(args) == 2:
            pdbId, fileName = args
        elif len(args) == 1:
            fileName = args[0]
            pdbId, fileName = str(fileName), fileName
        else:
            raise ValueError(
                "Error, input should be (id, fileName) or (fileName))")

        if re.match("http(s?)://", fileName):
            r = requests.get(fileName)
            if r.ok:
                fileName = StringIO(r.text)
            else:
                raise Exception("Error downloading pdb")

        try:
            if not isinstance(fileName, str) or not fileName.endswith(".gz"):
                structure = PDBParser.get_structure(self, pdbId, fileName)
            else:
                with gzip.open(fileName) as f:
                    structure = PDBParser.get_structure(self, pdbId, f)
        except Exception as e:
            print(e)
            structure = MMCIFParser.get_structure(self, pdbId, fileName)
        if self.removeHeteroDuplicated:
            structure = self.filterOutDuplicated(structure)
        return structure
Example #31
0
 def test_write(self):
     """Test a simple structure object is written out correctly to MMTF."""
     parser = MMCIFParser()
     struc = parser.get_structure("1A8O", "PDB/1A8O.cif")
     io = MMTFIO()
     io.set_structure(struc)
     filenumber, filename = tempfile.mkstemp()
     os.close(filenumber)
     try:
         io.save(filename)
         struc_back = MMTFParser.get_structure(filename)
         dict_back = mmtf.parse(filename)
         self.assertEqual(dict_back.structure_id, "1A8O")
         self.assertEqual(dict_back.num_models, 1)
         self.assertEqual(dict_back.num_chains, 2)
         self.assertEqual(dict_back.num_groups, 158)
         self.assertEqual(dict_back.num_atoms, 644)
         self.assertEqual(len(dict_back.x_coord_list), 644)
         self.assertEqual(len(dict_back.y_coord_list), 644)
         self.assertEqual(len(dict_back.z_coord_list), 644)
         self.assertEqual(len(dict_back.b_factor_list), 644)
         self.assertEqual(len(dict_back.occupancy_list), 644)
         self.assertEqual(dict_back.x_coord_list[5], 20.022)
         self.assertEqual(set(dict_back.ins_code_list), {"\x00"})
         self.assertEqual(set(dict_back.alt_loc_list), {"\x00"})
         self.assertEqual(list(dict_back.atom_id_list), list(range(1, 645)))
         self.assertEqual(list(dict_back.sequence_index_list),
                          list(range(70)) + [-1] * 88)
         self.assertEqual(dict_back.chain_id_list, ["A", "B"])
         self.assertEqual(dict_back.chain_name_list, ["A", "A"])
         self.assertEqual(dict_back.chains_per_model, [2])
         self.assertEqual(len(dict_back.group_list), 21)
         self.assertEqual(len(dict_back.group_id_list), 158)
         self.assertEqual(len(dict_back.group_type_list), 158)
         self.assertEqual(dict_back.groups_per_chain, [70, 88])
         self.assertEqual(len(dict_back.entity_list), 2)
         self.assertEqual(dict_back.entity_list[0]["type"], "polymer")
         self.assertEqual(dict_back.entity_list[0]["chainIndexList"], [0])
         self.assertEqual(
             dict_back.entity_list[0]["sequence"],
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
         )
         self.assertEqual(dict_back.entity_list[1]["type"], "water")
         self.assertEqual(dict_back.entity_list[1]["chainIndexList"], [1])
         self.assertEqual(dict_back.entity_list[1]["sequence"], "")
     finally:
         os.remove(filename)
Example #32
0
 def build_all(cls,
               reslist,
               reference_site,
               parent_entry,
               cif_path,
               annotate=True,
               redundancy_cutoff=None):
     """Builds all sites in using as input a list of catalytic residues.
     Returns a list of PdbSite objects"""
     # Map structure objects in every residue
     sites = []
     mmcif_dict = dict()
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn('Could not parse structure {}'.format(
             cif_path, RuntimeWarning))
         return sites
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     # We want all equivalent residues from identical assembly chains
     reslist = PdbSite._get_assembly_residues(reslist, structure)
     # Get seeds to build active sites
     seeds = PdbSite._get_seeds(reslist)
     # Build a site from each seed
     for seed in seeds:
         sites.append(cls.build(seed, reslist, reference_site,
                                parent_entry))
     # Reduce redundancy
     sites = PdbSite._remove_redundant_sites(sites,
                                             cutoff=redundancy_cutoff)
     # Add ligands and annotations
     if annotate and structure:
         for site in sites:
             site.parent_structure = structure
             site.mmcif_dict = mmcif_dict
             site.find_ligands()
     # Flag unclustered sites
     PdbSite._mark_unclustered(sites)
     return sites
Example #33
0
    def test_header(self):
        """Test if the parser populates header data."""
        parser = MMCIFParser()

        structure = parser.get_structure("example", "PDB/a_structure.cif")
        self.assertEqual("", structure.header["idcode"])
        self.assertEqual("", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("", structure.header["structure_method"])
        self.assertEqual(0.0, structure.header["resolution"])

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual("1A8O", structure.header["idcode"])
        self.assertEqual("Viral protein", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("X-RAY DIFFRACTION", structure.header["structure_method"])
        self.assertEqual(1.7, structure.header["resolution"])
Example #34
0
def CifAtomIterator(source):
    """Return SeqRecord objects for each chain in an mmCIF file.

    Argument source is a file-like object or a path to a file.

    The sequences are derived from the 3D structure (_atom_site.* fields)
    in the mmCIF file.

    Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries
    are converted to "X" in the sequence.

    In addition to information from the PDB header (which is the same for all
    records), the following chain specific information is placed in the
    annotation:

    record.annotations["residues"] = List of residue ID strings
    record.annotations["chain"] = Chain ID (typically A, B ,...)
    record.annotations["model"] = Model ID (typically zero)

    Where amino acids are missing from the structure, as indicated by residue
    numbering, the sequence is filled in with 'X' characters to match the size
    of the missing region, and  None is included as the corresponding entry in
    the list record.annotations["residues"].

    This function uses the Bio.PDB module to do most of the hard work. The
    annotation information could be improved but this extra parsing should be
    done in parse_pdb_header, not this module.

    This gets called internally via Bio.SeqIO for the atom based interpretation
    of the PDB file format:

    >>> from Bio import SeqIO
    >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-atom"):
    ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    Equivalently,

    >>> with open("PDB/1A8O.cif") as handle:
    ...     for record in CifAtomIterator(handle):
    ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    """
    # TODO - Add record.annotations to the doctest, esp the residues (not working?)

    # Only import parser when needed, to avoid/delay NumPy dependency in SeqIO
    from Bio.PDB.MMCIFParser import MMCIFParser

    structure = MMCIFParser().get_structure(None, source)
    pdb_id = structure.header["idcode"]
    if not pdb_id:
        warnings.warn("Could not determine the PDB ID.",
                      BiopythonParserWarning)
        pdb_id = "????"
    yield from AtomIterator(pdb_id, structure)
def get_descriptors(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    pos = []
    model = structure[0]
    hse = HSExposureCB(model)
    for chain in model:
        pos_c = []
        for residue in chain:
            dic = {}
            dic["name"] = residue.get_resname()
            if residue.has_id('CA'):
                vca = residue['CA'].get_vector()
                dic["coord"] = vca
                hse_ = hse[(chain.id, residue.id)]
                dic["hse"] = (hse_[0], hse_[1])
            pos_c.append(dic)
        pos = pos + pos_c
    return pos
Example #36
0
def func1():
    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.PDBParser import PDBParser
    parser1 = PDBParser(PERMISSIVE=0, QUIET=True)

    from Bio.PDB.PDBIO import PDBIO

    #pathmmcif = "/Users/tarun/Documents/mmCIF"
    #pathmmcif = "/data/pdb/divided/mmCIF"
    pathmmcif = "/Volumes/BIOINFO/mmCIF"
    #pathmmcif = "/Volumes/RCSB_DATA/pdb"

    #count = 0
    #if count == 0:
    try:
        pdb1 = "{}".format(sys.argv[2])
        fol = pdb1[1:3]
        c1 = "{}".format(sys.argv[3])
        pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1)
        #pdbfile = "{}/{}/pdb{}.ent.gz".format(pathmmcif,fol,pdb1)
        tar = gzip.open("{}".format(pdbfile), "rb")
        out = open("pdbprocess.cif", "wb")
        #out = open("pdbprocess.pdb","wb")
        out.write(tar.read())
        tar.close()
        out.close()
        structure_id = "{}".format(pdb1)
        filename = "pdbprocess.cif"
        #filename = "pdbprocess.pdb"
        structure = parser.get_structure(structure_id, filename)
        model = structure[0]
        chain = model["{}".format(c1)]

        io = PDBIO()
        io.set_structure(chain)
        io.save("chain1.pdb")
    except:
        print("FILE NOT FOUND")
Example #37
0
def get_STR(filePath, fileType='pdb'):
    if fileType == "pdb":
        STR = MMCIFParser(QUIET=True).get_structure("pdb", filePath)
        return STR

    if fileType == "cif":
        DICT = MMCIF2Dict(filePath)
        # print(DICT)
        return DICT
    else:
        raise TypeError("%s is not a valid fileType" % fileType)
Example #38
0
    def check_mmtf_vs_cif(self, mmtf_filename, cif_filename):
        """Compare parsed structures for MMTF and CIF files."""
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            mmtf_struct = MMTFParser.get_structure(mmtf_filename)
        mmcif_parser = MMCIFParser()
        mmcif_struct = mmcif_parser.get_structure("example", cif_filename)
        self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()]
        self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()]
        self.check_atoms()
        mmcif_chains = [x for x in mmcif_struct.get_chains()]
        mmtf_chains = [x for x in mmtf_struct.get_chains()]
        self.assertEqual(len(mmcif_chains), len(mmtf_chains))
        for i, e in enumerate(mmcif_chains):
            self.mmcif_res = [x for x in mmcif_chains[i].get_residues()]
            self.mmtf_res = [x for x in mmtf_chains[i].get_residues()]
            self.check_residues()

        self.mmcif_res = [x for x in mmcif_struct.get_residues()]
        self.mmtf_res = [x for x in mmtf_struct.get_residues()]
        self.check_residues()
        self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))
Example #39
0
    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]],
                                     [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array(
                [0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904],
                dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array(
                [1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221],
                dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
Example #40
0
 def testModels(self):
     """Test file with multiple models"""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1LCD.cif")
     self.assertEqual(len(structure), 3)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         #==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         self.assertEqual(structure[1].serial_num, 2)
         self.assertEqual(structure[2].serial_num, 3)
         #First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         #Here non-standard MSE are shown as M
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
         #==========================================================
         #Now try strict version with only standard amino acids
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
Example #41
0
    def test_parsers(self):
        """Extract polypeptides from 1A80."""

        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num, structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        for atoms in [s_atoms, f_atoms]:
            self.assertEqual(len(atoms), 644)
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)

            self.assertEqual(atoms[0].get_bfactor(), 18.03)
            for atom in atoms:
                self.assertIsNone(atom.get_anisou())