def pdb_contacts(pdb, chain, dist): i = 0 # Get chain code from 6th letter in pdb name pdb_chain = pdb_getchain(pdb, chain) ppb = CaPPBuilder() # Initialise building of a polypeptide and its sequence # If a mutated residue is present in a chain it is classed as a hetatm # However, not all hetatms in a chain are part of the sequence. The CaPPBuilder # makes sequences by requiring CA-CA distances to be <4.3A. Common hetatms are # identified such that an MSE hetatm will be replaced by an M in the sequence polypepTot = ppb.build_peptides(pdb_chain, aa_only=False)[0] sequen = polypepTot.get_sequence() # Add to the polypeptide for polypep_raw in ppb.build_peptides(pdb_chain, aa_only=False)[1:]: sequen += (polypep_raw.get_sequence()) polypepTot += polypep_raw i = 0 # Sometimes the terminal residue in a protein isn't fully resolved last_res = polypepTot[-1] if last_res.has_id("CA") or last_res.has_id("CB"): polypep = polypepTot # If resolved take whole AA file_seq.write(">sequence\n%s\n" % sequen) file_seq.write("%s" % sequen) else: polypep = polypepTot[:-1] # Otherwise take all but the last AA file_seq.write(">sequence\n%s\n" % sequen[:-1]) file_seq.write("%s" % sequen[:-1]) file_map.write(str(len(polypep)) + "\n") # sys.stderr.write(pdb+'\n') for residue1 in polypep: # Quite frequently residues do not have resolved CB, in which case use CA # If no CA exists, print ERROR. Grep the output if running unsupervised. try: if residue1.has_id("CB"): #get_resname() == "GLY": c_alpha = residue1["CB"] else: c_alpha = residue1["CA"] except: sys.stdout.write("ERROR") raise i += 1 j = 0 for residue2 in polypep: try: if residue2.has_id("CB"): #get_resname() == "GLY": c_alpha2 = residue2["CB"] else: c_alpha2 = residue2["CA"] except: file_map.write("ERROR") raise j += 1 if (norm(c_alpha.get_coord(), c_alpha2.get_coord()) < dist): # 3.5 ): file_map.write("%d %d\n" % (i - 1, j - 1))
def test_insertions(self): """Test file with residue insertion codes.""" parser = MMCIFParser(QUIET=1) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) structure = parser.get_structure("example", "PDB/4ZHL.cif") for ppbuild in [PPBuilder(), CaPPBuilder()]: # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 2) pp = polypeptides[0] # Check the start and end positions (first segment only) self.assertEqual(pp[0].get_id()[1], 16) self.assertEqual(pp[-1].get_id()[1], 244) # Check the sequence refseq = ( "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR" "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY" "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW" "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE" ) s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual(refseq, str(s))
def pdb_polypep(pdb, chain, trim): i = 0 # Get chain code from 6th letter in pdb name pdb_chain = pdb_getchain(pdb, chain) ppb = CaPPBuilder() # Initialise building of a polypeptide and its sequence # If a mutated residue is present in a chain it is classed as a hetatm # However, not all hetatms in a chain are part of the sequence. The CaPPBuilder # makes sequences by requiring CA-CA distances to be <4.3A. Common hetatms are # identified such that an MSE hetatm will be replaced by an M in the sequence polypepTot = ppb.build_peptides(pdb_chain, aa_only=False)[0] sequen = polypepTot.get_sequence() # Add to the polypeptide for polypep_raw in ppb.build_peptides(pdb_chain, aa_only=False)[1:]: sequen += (polypep_raw.get_sequence()) polypepTot += polypep_raw # Remove unstructured terminal ends if trim: polypepTot = pp_trim(polypepTot) # Sometimes the terminal residue in a protein isn't fully resolved last_res = polypepTot[-1] if last_res.has_id("CA") or last_res.has_id("CB"): polypep = polypepTot # If resolved take whole AA # file_seq.write(">sequence\n%s\n" %sequen) ## file_seq.write("%s" %sequen) else: polypep = polypepTot[:-1] # Otherwise take all but the last AA # file_seq.write(">sequence\n%s\n" %sequen[:-1]) ## file_seq.write("%s" %sequen[:-1]) # file_map.write( str(len(polypep)) +"\n" ) # sys.stderr.write(pdb+'\n') return polypep
def read_pdb_file(file_name, name=None): """ Extract info from a PDB file file_name: path of pdb file name: name of the structure (default name of the file without extension) return:: (structure,R,polypeptides,sequence,seq_res_dict) structure: structure object residues: list of residues polypeptides: list of polypeptides in the structure sequence: combined sequence (for all polypeptides) seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to residues[seq_res_dict[i]] """ if name is None: name = splitext(file_name)[0] structure = PDBParser().get_structure(name, file_name) if len(structure) != 1: raise ValueError("Unexpected number of structures in " + name) # residues = Selection.unfold_entities(structure, 'R') atoms = Selection.unfold_entities(structure, 'A') polypeptides = PPBuilder().build_peptides(structure) if len(polypeptides) == 0: polypeptides = CaPPBuilder().build_peptides(structure) sequence = ''.join([str(p.get_sequence()) for p in polypeptides]) residues = [ residue for polypeptide in polypeptides for residue in polypeptide ] protein_name = os.path.basename(file_name).replace(".pdb", "") return protein_name, structure, residues, sequence, atoms
def test_parser(self): """Extract polypeptides from 1A80.""" parser = MMCIFParser() structure = parser.get_structure("example", "PDB/1A8O.cif") self.assertEqual(len(structure), 1) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 151) self.assertEqual(pp[-1].get_id()[1], 220) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ" "NANPDCKTILKALGPGATLEEMMTACQG", str(s)) # ========================================================== # Now try strict version with only standard amino acids # Should ignore MSE 151 at start, and then break the chain # at MSE 185, and MSE 214,215 polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 3) # First fragment pp = polypeptides[0] self.assertEqual(pp[0].get_id()[1], 152) self.assertEqual(pp[-1].get_id()[1], 184) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s)) # Second fragment pp = polypeptides[1] self.assertEqual(pp[0].get_id()[1], 186) self.assertEqual(pp[-1].get_id()[1], 213) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s)) # Third fragment pp = polypeptides[2] self.assertEqual(pp[0].get_id()[1], 216) self.assertEqual(pp[-1].get_id()[1], 220) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TACQG", str(s))
def testModels(self): """Test file with multiple models.""" parser = MMCIFParser(QUIET=1) f_parser = FastMMCIFParser(QUIET=1) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) structure = parser.get_structure("example", "PDB/1LCD.cif") f_structure = f_parser.get_structure("example", "PDB/1LCD.cif") self.assertEqual(len(structure), 3) self.assertEqual(len(f_structure), 3) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(structure[1].serial_num, 2) self.assertEqual(structure[2].serial_num, 3) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s) ) # ========================================================== # Now try strict version with only standard amino acids polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s) ) # This structure contains several models with multiple lengths. # The tests were failing. structure = parser.get_structure("example", "PDB/2OFG.cif") self.assertEqual(len(structure), 3)
def test_polypeptide(self): """Tests on polypetide class and methods.""" p = PDBParser(PERMISSIVE=True) pdb1 = "PDB/1A8O.pdb" s = p.get_structure("scr", pdb1) ppb = PPBuilder() pp = ppb.build_peptides(s) self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(str(pp[2].get_sequence()), "TACQG") phi_psi = pp[0].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3) self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3) self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3) self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3) phi_psi = pp[1].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3) self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3) phi_psi = pp[2].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3) self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3) ppb = CaPPBuilder() pp = ppb.build_peptides(s) self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(str(pp[2].get_sequence()), "TACQG") self.assertEqual([ca.serial_number for ca in pp[0].get_ca_list()], [ 10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131, 139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242, 251, 260, 267, 276, 284 ]) taus = pp[1].get_tau_list() self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3) self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3) self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3) thetas = pp[2].get_theta_list() self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3) self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3) self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
def test_cappbuilder_tau(self): """Test tau angles calculated with CaPPBuilder.""" ppb = CaPPBuilder() pp = ppb.build_peptides(self.structure) taus = pp[1].get_tau_list() self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3) self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3) self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3) thetas = pp[2].get_theta_list() self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3) self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3) self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
def test_cappbuilder_real(self): """Test CaPPBuilder on real PDB file.""" ppb = CaPPBuilder() pp = ppb.build_peptides(self.structure) pp0_seq = pp[0].get_sequence() pp1_seq = pp[1].get_sequence() pp2_seq = pp[2].get_sequence() self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(pp2_seq, "TACQG") self.assertEqual( [ca.serial_number for ca in pp[0].get_ca_list()], [ 10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131, 139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242, 251, 260, 267, 276, 284, ], )
def run_test(): from Bio.PDB import PDBParser, PPBuilder, CaPPBuilder # first make a PDB parser object p = PDBParser(PERMISSIVE=1) # get the structure, call it "example" structure = p.get_structure("example", "PDB/a_structure.pdb") # now loop over content and print some info for model in structure.get_list(): model_id = model.get_id() print "Model %i contains %i chains." % (model_id, len(model)) for chain in model.get_list(): chain_id = chain.get_id() print "\tChain '%s' contains %i residues." % (chain_id, len(chain)) for residue in chain.get_list(): residue_id = residue.get_id() hetfield, resseq, icode = residue_id print "\t\tResidue ('%s', %i, '%s') contains %i atoms." % ( hetfield, resseq, icode, len(residue)) # check if there is disorder due to a point mutation --- this is rare if residue.is_disordered() == 2: print "\t\t\tThere is a point mutation present in the crystal at this position." s = "\t\t\tResidues at this position are " for resname in residue.disordered_get_id_list(): s = s + resname + " " print s[:-1] + "." # count the number of disordered atoms if residue.is_disordered() == 1: disordered_count = 0 for atom in residue.get_list(): if atom.is_disordered(): disordered_count = disordered_count + 1 if disordered_count > 0: print "\t\t\tThe residue contains %i disordered atoms." % disordered_count print "Polypeptides using C-N" ppb = PPBuilder() for pp in ppb.build_peptides(structure[1]): print pp print "Polypeptides using CA-CA" ppb = CaPPBuilder() for pp in ppb.build_peptides(structure[1]): print pp print "NeighborSearch test" quick_neighbor_search_test()
def test_ca_ca(self): """Extract polypeptides using CA-CA.""" ppbuild = CaPPBuilder() polypeptides = ppbuild.build_peptides(self.structure[1]) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 2) self.assertEqual(pp[-1].get_id()[1], 86) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("RCGSQGGGSTCPGLRCCSIWGWCGDSEPYCGRTCENKCWSGER" "SDHRCGAAVGNPPCGQDRCCSVHGWCGGGNDYCSGGNCQYRC", str(s))
def test_cappbuilder_real_nonstd(self): """Test CaPPBuilder on real PDB file allowing non-standard amino acids.""" ppb = CaPPBuilder() pp = ppb.build_peptides(self.structure, False) self.assertEqual(len(pp), 1) # Check the start and end positions self.assertEqual(pp[0][0].get_id()[1], 151) self.assertEqual(pp[0][-1].get_id()[1], 220) # Check the sequence s = pp[0].get_sequence() self.assertIsInstance(s, Seq) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG", s)
def pdb_sequence(pdb_file, id=None, method="order"): from Bio.PDB import PDBParser, CaPPBuilder from Bio.PDB.Polypeptide import three_to_one if id is None: id = util.make_id_from_file_name(pdb_file) parser = PDBParser() structure = parser.get_structure(id, pdb_file) seq_chains = [] for chain in structure.get_chains(): id_chain = chain.get_id() if method == "distance": ppb = CaPPBuilder() seq = sum((pp.get_sequence() for pp in ppb.build_peptides(chain)), Seq("", IUPAC.protein)) seq_spec = None #TODO: implement elif method == "order": seq = [] seq_spec = [] for res in chain.get_residues(): seq.append(three_to_one(res.get_resname())) ## from Bio docs, res.get_full_id() returns: ("1abc", 0, "A", (" ", 10, "A")) fid = res.get_full_id() seq_spec.append( pdb_seq_spec(chain=fid[-2].strip(), resn=res.get_resname(), resi=fid[-1][-2], ins=fid[-1][-1].strip())) seq = Seq("".join(seq), IUPAC.protein) else: raise ValueError("Unknown method: {}".format(method)) seq_chains.append( dict(id_chain=id_chain, seq_rec=SeqRecord(seq, id="{}_{}".format(id, id_chain), description=""), seq_spec=seq_spec)) chains_map = dict(((x["id_chain"], x) for x in seq_chains)) return pdb_seqs(id=id, chains=seq_chains, chains_map=chains_map)
def testModels(self): """Test file with multiple models""" parser = MMCIFParser() structure = parser.get_structure("example", "PDB/1LCD.cif") self.assertEqual(len(structure), 3) for ppbuild in [PPBuilder(), CaPPBuilder()]: #========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(structure[1].serial_num, 2) self.assertEqual(structure[2].serial_num, 3) #First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) #Here non-standard MSE are shown as M self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)) #========================================================== #Now try strict version with only standard amino acids polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
def _remove_missing_res(self, record: SeqRecord, pdb: Path): structure = PDBParser().get_structure(record.id, pdb) sequence = ''.join([ str(_.get_sequence()) for _ in CaPPBuilder().build_peptides(structure, aa_only=False) ]) path = PairwiseAligner().align(record.seq.ungap('-'), sequence)[0].path gaps = [] for i, _ in enumerate(path[:-1]): if path[i][1] == path[i + 1][1]: gaps.append((path[i][0], path[i + 1][0])) gaps = list(reversed(gaps)) mut = record.seq.tomutable() for gap in gaps: i = 0 for k, res in enumerate(mut): if res == '-': continue if gap[0] <= i < gap[1]: mut[k] = '-' i += 1 record.seq = mut.toseq() return record
def test_parsers(self): """Extract polypeptides from 1A80.""" parser = MMCIFParser() fast_parser = FastMMCIFParser() structure = parser.get_structure("example", "PDB/1A8O.cif") f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif") self.assertEqual(len(structure), 1) self.assertEqual(len(f_structure), 1) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(f_structure[0].serial_num, structure[0].serial_num) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) f_polypeptides = ppbuild.build_peptides(f_structure[0], False) self.assertEqual(len(polypeptides), 1) self.assertEqual(len(f_polypeptides), 1) pp = polypeptides[0] f_pp = f_polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 151) self.assertEqual(pp[-1].get_id()[1], 220) self.assertEqual(f_pp[0].get_id()[1], 151) self.assertEqual(f_pp[-1].get_id()[1], 220) # Check the sequence s = pp.get_sequence() f_s = f_pp.get_sequence() self.assertEqual(s, f_s) # enough to test this self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ" "NANPDCKTILKALGPGATLEEMMTACQG", str(s), ) # ========================================================== # Now try strict version with only standard amino acids # Should ignore MSE 151 at start, and then break the chain # at MSE 185, and MSE 214,215 polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 3) # First fragment pp = polypeptides[0] self.assertEqual(pp[0].get_id()[1], 152) self.assertEqual(pp[-1].get_id()[1], 184) s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s)) # Second fragment pp = polypeptides[1] self.assertEqual(pp[0].get_id()[1], 186) self.assertEqual(pp[-1].get_id()[1], 213) s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s)) # Third fragment pp = polypeptides[2] self.assertEqual(pp[0].get_id()[1], 216) self.assertEqual(pp[-1].get_id()[1], 220) s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TACQG", str(s)) s_atoms = list(structure.get_atoms()) f_atoms = list(f_structure.get_atoms()) for atoms in [s_atoms, f_atoms]: self.assertEqual(len(atoms), 644) atom_names = ["N", "CA", "C", "O", "CB"] self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names) self.assertSequenceEqual( [a.get_occupancy() for a in atoms[:5]], [1.0, 1.0, 1.0, 1.0, 1.0] ) self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray) coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32) numpy.testing.assert_array_equal(atoms[0].get_coord(), coord) self.assertEqual(atoms[0].get_bfactor(), 18.03) for atom in atoms: self.assertIsNone(atom.get_anisou())