def SplitChain(PDB_objects): """ Splits a list of PDB files by chain creating one PDB and one FASTA file per chain. Arguments: PDB_objects: list of PDB objects (with many chains) generated by the PDB parser. """ File_prefix = [] for pdb in PDB_objects: chain_names = set() io = PDBIO() # Creates a PDB file for each chain of the original file. for chain in pdb.get_chains(): if chain.get_id() not in chain_names: io.set_structure(chain) io.save(pdb.get_id() + "_" + chain.get_id() + ".pdb") File_prefix.append(pdb.get_id() + "_" + chain.get_id()) # Creates a FASTA file for each chain of the original file. polipeptide = PPBuilder() for pp in polipeptide.build_peptides(pdb): fasta = open(pdb.get_id() + "_" + chain.get_id() + ".fa", "w") fasta.write(">" + pdb.get_id() + "_" + chain.get_id() + "\n") fasta.write(str(pp.get_sequence())) chain_names.add(chain.get_id()) return File_prefix
def write_backbone_angles(chain, region=None, offset=0, outfile=sys.stdout, header=False): """ Write Psi/Phi angles from a pdb file """ if region is None: region = (0, float('inf')) polypeptide_builder = PPBuilder() polypeptides = polypeptide_builder.build_peptides(chain) if header: print(HEADER, file=outfile) for peptide in polypeptides: angles = peptide.get_phi_psi_list() for residue, (phi, psi) in zip(peptide, angles): position = residue.get_id()[1] if region[0] <= position <= region[1]: print(chain.id, position, seq1(residue.get_resname()), position + offset, 'NA' if phi is None else phi * RAD_FACTOR, 'NA' if psi is None else psi * RAD_FACTOR, sep='\t', file=outfile)
def get_secondary_structure(structure): rama_ss_ranges = [(-180, -180, 80, 60, 'E', 'blue'), (-180, 50, 80, 130, 'E', 'blue'), (-100, -180, 100, 60, 'P', 'green'), (-100, 50, 100, 130, 'P', 'green'), (-180, -120, 180, 170, 'H', 'red'), (0, -180, 180, 360, 'L', 'yellow')] # Calculate PSI and PHI ppb = PPBuilder() # PolyPeptideBuilder ss = ["" for x in range(N)] for chain in structure: for pp in ppb.build_peptides(chain): phi_psi = pp.get_phi_psi_list( ) # [(phi_residue_1, psi_residue_1), ...] for i, residue in enumerate(pp): # print(model, chain, i, residue, phi_psi[i]) # Convert radians to degrees and remove first and last value that are None if phi_psi[i][0] is not None and phi_psi[i][1] is not None: for x, y, w, h, ss_c, color in rama_ss_ranges: if x <= phi_psi[i][0] < x + w and y <= phi_psi[i][ 1] < y + h: ss[i] = ss_c break return ss
def CreateJoinedFastas(input_PDB_objects): """ Joins many PDB objects and creates a FASTA file with all objects joined. Arguments: input_PDB_objects: list of PDB objects whose sequence will be added to the FASTA file. """ polipeptide = PPBuilder() first_line = True filename = "" # Create FASTA files. for obj in input_PDB_objects: filename = filename + obj.get_id() + "_" filename = filename + ".fa" joined_fasta = open(filename, 'w') # Write FASTA files. for obj in input_PDB_objects: if first_line: joined_fasta.write(">" + obj.get_id() + "\n") first_line = False else: joined_fasta.write("\n" + ">" + obj.get_id() + "\n") for polipep in polipeptide.build_peptides(obj): joined_fasta.write(str(polipep.get_sequence())) return filename
def compute_secondary_structure(self, model): """ This function defines all the secondary structures of the model passed in input :param model: one model :return: the matrix of secondary structures """ # Calculate PSI and PHI ppb = PPBuilder() rama = { } # { chain : [[residue_1, ...], [phi_residue_1, ...], [psi_residue_2, ...] ] } residue_found = 0 for chain in model: for pp in ppb.build_peptides(chain): phi_psi = pp.get_phi_psi_list() for i, residue in enumerate(pp): if phi_psi[i][0] is not None and phi_psi[i][1] is not None: # Conversion to degrees when the values are not None (for first and last) rama.setdefault(chain.id, [[], [], []]) rama[chain.id][0].append(residue) rama[chain.id][1].append(math.degrees(phi_psi[i][0])) rama[chain.id][2].append(math.degrees(phi_psi[i][1])) else: # Adding of Nan if the angles are None (for first and last) rama.setdefault(chain.id, [[], [], []]) rama[chain.id][0].append(residue) rama[chain.id][1].append(math.nan) rama[chain.id][2].append(math.nan) residue_found += 1 # Eventual nan-padding if something goes wrong during the angle computation if residue_found < self._residues: for i in range(self._residues - residue_found): rama.setdefault('Z', [[], [], []]) rama['Z'][0].append(None) rama['Z'][1].append(math.nan) rama['Z'][2].append(math.nan) # Comparison of the angles with the Ramachandran regions ss = [] for chain_id in rama: for residue, phi, psi in zip(*rama[chain_id]): ss_class = None if math.isnan(phi) and math.isnan(psi): # If nan (angles not available) insert a symbol indicating this situation ss_class = '-' else: # Determine the correspondent region and store it for x, y, width, height, ss_c, color in self._ranges: if x <= phi < x + width and y <= psi < y + height: ss_class = ss_c break ss.append(ss_class) return ss
def test_insertions(self): """Test file with residue insertion codes.""" parser = MMCIFParser(QUIET=1) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) structure = parser.get_structure("example", "PDB/4ZHL.cif") for ppbuild in [PPBuilder(), CaPPBuilder()]: # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 2) pp = polypeptides[0] # Check the start and end positions (first segment only) self.assertEqual(pp[0].get_id()[1], 16) self.assertEqual(pp[-1].get_id()[1], 244) # Check the sequence refseq = ( "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR" "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY" "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW" "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE" ) s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual(refseq, str(s))
def read_pdb_file(file_name, name=None): """ Extract info from a PDB file file_name: path of pdb file name: name of the structure (default name of the file without extension) return:: (structure,R,polypeptides,sequence,seq_res_dict) structure: structure object residues: list of residues polypeptides: list of polypeptides in the structure sequence: combined sequence (for all polypeptides) seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to residues[seq_res_dict[i]] """ if name is None: name = splitext(file_name)[0] structure = PDBParser().get_structure(name, file_name) if len(structure) != 1: raise ValueError("Unexpected number of structures in " + name) # residues = Selection.unfold_entities(structure, 'R') atoms = Selection.unfold_entities(structure, 'A') polypeptides = PPBuilder().build_peptides(structure) if len(polypeptides) == 0: polypeptides = CaPPBuilder().build_peptides(structure) sequence = ''.join([str(p.get_sequence()) for p in polypeptides]) residues = [ residue for polypeptide in polypeptides for residue in polypeptide ] protein_name = os.path.basename(file_name).replace(".pdb", "") return protein_name, structure, residues, sequence, atoms
def test_ppbuilder_torsion(self): """Test phi/psi angles calculated with PPBuilder.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure) phi_psi = pp[0].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3) self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3) self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3) self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3) phi_psi = pp[1].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3) self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3) phi_psi = pp[2].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3) self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3)
def _pp(self, pdb_path, chain_id): pdb_id = Path(pdb_path).stem pp_list = PPBuilder().build_peptides(PDBParser().get_structure( pdb_id, pdb_path)[0][chain_id]) pp = pp_list[0] for i in pp_list[1:]: pp += i return pp
def get_structure_sequence(struct): # type: (Structure) -> str """ Gets the structure sequence using PPBuilder :param struct: Structure object :return: struct sequence """ ppb = PPBuilder() return ''.join( [str(pp.get_sequence()) for pp in ppb.build_peptides(struct)])
def test_parser(self): """Extract polypeptides from 1A80.""" parser = MMCIFParser() structure = parser.get_structure("example", "PDB/1A8O.cif") self.assertEqual(len(structure), 1) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 151) self.assertEqual(pp[-1].get_id()[1], 220) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ" "NANPDCKTILKALGPGATLEEMMTACQG", str(s)) # ========================================================== # Now try strict version with only standard amino acids # Should ignore MSE 151 at start, and then break the chain # at MSE 185, and MSE 214,215 polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 3) # First fragment pp = polypeptides[0] self.assertEqual(pp[0].get_id()[1], 152) self.assertEqual(pp[-1].get_id()[1], 184) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s)) # Second fragment pp = polypeptides[1] self.assertEqual(pp[0].get_id()[1], 186) self.assertEqual(pp[-1].get_id()[1], 213) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s)) # Third fragment pp = polypeptides[2] self.assertEqual(pp[0].get_id()[1], 216) self.assertEqual(pp[-1].get_id()[1], 220) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TACQG", str(s))
def test_polypeptide(self): """Tests on polypetide class and methods.""" p = PDBParser(PERMISSIVE=True) pdb1 = "PDB/1A8O.pdb" s = p.get_structure("scr", pdb1) ppb = PPBuilder() pp = ppb.build_peptides(s) self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(str(pp[2].get_sequence()), "TACQG") phi_psi = pp[0].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3) self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3) self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3) self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3) phi_psi = pp[1].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3) self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3) phi_psi = pp[2].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3) self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3) ppb = CaPPBuilder() pp = ppb.build_peptides(s) self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(str(pp[2].get_sequence()), "TACQG") self.assertEqual([ca.serial_number for ca in pp[0].get_ca_list()], [ 10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131, 139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242, 251, 260, 267, 276, 284 ]) taus = pp[1].get_tau_list() self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3) self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3) self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3) thetas = pp[2].get_theta_list() self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3) self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3) self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
def is_protein(chain): """ Check if chain is a protein. :param chain: :return: """ ppb = PPBuilder() for pp in ppb.build_peptides(chain): if len(pp.get_sequence()) > 0: return True return False
def testModels(self): """Test file with multiple models.""" parser = MMCIFParser(QUIET=1) f_parser = FastMMCIFParser(QUIET=1) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) structure = parser.get_structure("example", "PDB/1LCD.cif") f_structure = f_parser.get_structure("example", "PDB/1LCD.cif") self.assertEqual(len(structure), 3) self.assertEqual(len(f_structure), 3) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(structure[1].serial_num, 2) self.assertEqual(structure[2].serial_num, 3) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s) ) # ========================================================== # Now try strict version with only standard amino acids polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s) ) # This structure contains several models with multiple lengths. # The tests were failing. structure = parser.get_structure("example", "PDB/2OFG.cif") self.assertEqual(len(structure), 3)
def chain_to_one_pp(chain): ppb = PPBuilder() polypeptides = ppb.build_peptides(chain) if len(polypeptides) != 1: print('warning ', len(polypeptides), ' polypeptides from one chain, extending first pp') for pp in polypeptides[1:]: polypeptides[0].extend(pp) return polypeptides[0]
def structure_filtered_dca_get_sequence_from_structure(structure): from Bio.PDB import PPBuilder sequence = "" ppb = PPBuilder(radius=10.0) for pp in ppb.build_peptides(structure, aa_only=False): sequence += '%s\n' % pp.get_sequence() return sequence.replace('\n', '')
def run_test(): from Bio.PDB import PDBParser, PPBuilder, CaPPBuilder # first make a PDB parser object p = PDBParser(PERMISSIVE=1) # get the structure, call it "example" structure = p.get_structure("example", "PDB/a_structure.pdb") # now loop over content and print some info for model in structure.get_list(): model_id = model.get_id() print "Model %i contains %i chains." % (model_id, len(model)) for chain in model.get_list(): chain_id = chain.get_id() print "\tChain '%s' contains %i residues." % (chain_id, len(chain)) for residue in chain.get_list(): residue_id = residue.get_id() hetfield, resseq, icode = residue_id print "\t\tResidue ('%s', %i, '%s') contains %i atoms." % ( hetfield, resseq, icode, len(residue)) # check if there is disorder due to a point mutation --- this is rare if residue.is_disordered() == 2: print "\t\t\tThere is a point mutation present in the crystal at this position." s = "\t\t\tResidues at this position are " for resname in residue.disordered_get_id_list(): s = s + resname + " " print s[:-1] + "." # count the number of disordered atoms if residue.is_disordered() == 1: disordered_count = 0 for atom in residue.get_list(): if atom.is_disordered(): disordered_count = disordered_count + 1 if disordered_count > 0: print "\t\t\tThe residue contains %i disordered atoms." % disordered_count print "Polypeptides using C-N" ppb = PPBuilder() for pp in ppb.build_peptides(structure[1]): print pp print "Polypeptides using CA-CA" ppb = CaPPBuilder() for pp in ppb.build_peptides(structure[1]): print pp print "NeighborSearch test" quick_neighbor_search_test()
def run(infile, splitpdb): parser = PDBParser() struct = parser.get_structure('mystruct', infile) ppb = PPBuilder() basename = os.path.basename(infile) prefix = os.path.splitext(basename)[0] if splitpdb == 0: # We do NOT split the PDB and fasta files! seqfile = open(prefix + '.fasta', 'w') pdbio = PDBIO_RPL.PDBIO() pdbio.set_structure(struct) cleanfile = prefix + '_clean.pdb' pdbio.save(cleanfile) ListChains = [] for model in struct: for chain in model: ListChains.append(chain.id) ListPpdb = ppb.build_peptides(chain) if (len(ListPpdb) > 0): for index, pp in enumerate(ListPpdb): # print(chain.id,index,pp.get_sequence(),pp if splitpdb == 1: # We split the PDB and fasta files! seqfile = open( prefix + '_' + chain.id + '.' + str(index) + '.fasta', 'w') seq = pp.get_sequence() seqfile.write('>%s %s\n' % (prefix + '_chain_' + chain.id + '_' + str(index), len(seq))) seqfile.write('%s' % seq) seqfile.write('\n') if splitpdb == 1: # We split the PDB and fasta files! seqfile.close() startres = pp[0].id[1] endres = pp[-1].id[1] ofile = prefix + '_' + chain.id + '.' + str( index) + '.pdb' Dice_RPL.extract(struct, chain.id, startres, endres, ofile) else: # Also split chains that do not consist of amino acids! ChainList = chain.get_list() startres = ChainList[0].id[1] endres = ChainList[0].id[-1] ofile = prefix + '_' + chain.id + '.' + str(index) + '.pdb' Dice_RPL.extract(struct, chain.id, startres, endres, ofile) if splitpdb == 0: # We do NOT split the PDB and fasta files! seqfile.close() return ListChains
def get_sequence(self, chain_id): """ Input: self: Use Biopython.PDB structure which has been stored in an object variable chain_id : String (usually in ['A','B', 'C' ...]. The number of chains depends on the specific protein and the resulting structure) Return: Return the amino acid sequence (single-letter alphabet!) of a given chain (chain_id) in a Biopython.PDB structure as a string. """ sequence = 'SEQWENCE' ppb = PPBuilder() for pp in ppb.build_peptides(self.structure[0][chain_id]): return pp.get_sequence()
def test_c_n(self): """Extract polypeptides using C-N.""" ppbuild = PPBuilder() polypeptides = ppbuild.build_peptides(self.structure[1]) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 2) self.assertEqual(pp[-1].get_id()[1], 86) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("RCGSQGGGSTCPGLRCCSIWGWCGDSEPYCGRTCENKCWSGER" "SDHRCGAAVGNPPCGQDRCCSVHGWCGGGNDYCSGGNCQYRC", str(s))
def test_ppbuilder_real_nonstd(self): """Test PPBuilder on real PDB file allowing non-standard amino acids.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure, False) self.assertEqual(len(pp), 1) # Check the start and end positions self.assertEqual(pp[0][0].get_id()[1], 151) self.assertEqual(pp[0][-1].get_id()[1], 220) # Check the sequence s = pp[0].get_sequence() self.assertIsInstance(s, Seq) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG", s)
def get_ignored_res(file: str): x, y, ignored, output = [], [], [], {} for model in PDBParser().get_structure(id=None, file=file): for chain in model: peptides = PPBuilder().build_peptides(chain) for peptide in peptides: for aa, angles in zip(peptide, peptide.get_phi_psi_list()): residue = chain.id + ":" + aa.resname + str(aa.id[1]) output[residue] = angles for key, value in output.items(): # Only get residues with both phi and psi angles if value[0] and value[1]: x.append(value[0] * 180 / pi) y.append(value[1] * 180 / pi) else: ignored.append((key, value)) return output, ignored, x, y
def get_sequence(pdb, chain): pdb_parser = PDBParser(PERMISSIVE=0) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb,pdb) pdb_chain = pdb_structure[0][chain] ppb=PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain): Sequence = Sequence + pp.get_sequence() io = PDBIO() io.set_structure(pdb_structure) output = pdb[-8:-4] +"_"+chain+".pdb" # output = pdb out = open(output[:-4]+chain+".fasta.txt","w") out.write(">"+pdb[:-4]+chain+"\n") out.write(str(Sequence)+"\n") out.close() io.save(output,SelectChains(chain))
def get_pp(pdb, chain, start, length, seq): """retrieve the residiues for a given pdb file and chain as polypeptides""" f = make_filename(pdb) p = PDBParser(PERMISSIVE=1) pdb_struct = p.get_structure( pdb, f) # Load the pdb structure pdb contained on the file f. pdb_chain = pdb_struct[0][ chain] # Select the right Chain of the structure. ppb = PPBuilder() # Initialize a peptide builder. peptides = ppb.build_peptides( pdb_chain) # Load the given chain as a peptide. for i, pep in enumerate(peptides): if str(pep.get_sequence()).find(seq) != -1: start = str(pep.get_sequence()).find(seq) break if start > 0 and (start + length + 2) <= len(pep): pp = pep[(start - 1):(start + length + 2)] return pp else: raise
def split_pdb_by_chain(pdb_id): if not os.path.isdir("pdb_chains/" + pdb_id.upper()): os.mkdir("pdb_chains/" + pdb_id.upper()) actual_pdbfile = PDBParser().get_structure( pdb_id, "ent_files/pdb" + pdb_id.lower() + ".ent") return_dict = dict() for model in actual_pdbfile: for chain in model: outfilename = pdb_id.upper() + "-" + str( model.get_id() + 1) + "_" + str(chain.get_id()) + ".pdb" if not os.path.isfile("pdb_chains/" + pdb_id.upper() + "/" + outfilename): io = PDBIO() io.set_structure(chain) io.save("pdb_chains/" + pdb_id.upper() + "/" + outfilename) ppb = PPBuilder().build_peptides(chain) this_seq = Seq("", generic_protein) for pp in ppb: this_seq += pp.get_sequence() return_dict[outfilename] = this_seq return return_dict
def testModels(self): """Test file with multiple models""" parser = MMCIFParser() structure = parser.get_structure("example", "PDB/1LCD.cif") self.assertEqual(len(structure), 3) for ppbuild in [PPBuilder(), CaPPBuilder()]: #========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(structure[1].serial_num, 2) self.assertEqual(structure[2].serial_num, 3) #First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) #Here non-standard MSE are shown as M self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)) #========================================================== #Now try strict version with only standard amino acids polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
def get_sequence(pdb, chain): if chain is "%": chain = " " warnings.filterwarnings('always', message='.*discontinuous at.*') pdb_parser = PDBParser( PERMISSIVE=0, QUIET=True ) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb, pdb) pdb_chain = pdb_structure[0][chain] ppb = PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain, aa_only=False): Sequence = Sequence + pp.get_sequence() io = PDBIO() io.set_structure(pdb_structure) output = pdb[0:-4] + ".pdb" out = open(output[:-4] + ".fasta.atom", "w") out.write(">" + pdb[0:-4] + "\n") out.write(str(Sequence) + "\n") out.close()
def get_sequence(pdb, chain, first, last, output): pdb_parser = PDBParser(PERMISSIVE=0) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb,pdb) pdb_chain = pdb_structure[0][chain] ppb=PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain): Sequence = Sequence + pp.get_sequence() io = PDBIO() io.set_structure(pdb_structure) # if pdb[-5] == chain: # output = pdb # else: # output = pdb[:-4]+chain+".pdb" ### writing out sequence to fasta # out = open(output[:-4]+".fasta.txt","w") # out.write(">"+output[:-4]+"\n") # out.write(str(Sequence[first-1: last-2])+"\n") # out.close() io.save(output,SelectDomain(chain, first, last))
def test_ppbuilder_real(self): """Test PPBuilder on real PDB file.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure) self.assertEqual(len(pp), 3) # Check termini self.assertEqual(pp[0][0].get_id()[1], 152) self.assertEqual(pp[0][-1].get_id()[1], 184) self.assertEqual(pp[1][0].get_id()[1], 186) self.assertEqual(pp[1][-1].get_id()[1], 213) self.assertEqual(pp[2][0].get_id()[1], 216) self.assertEqual(pp[2][-1].get_id()[1], 220) # Now check sequences pp0_seq = pp[0].get_sequence() pp1_seq = pp[1].get_sequence() pp2_seq = pp[2].get_sequence() self.assertIsInstance(pp0_seq, Seq) self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(pp2_seq, "TACQG")
def get_sequence(pdb, chain): pdb_parser = PDBParser( PERMISSIVE=0 ) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb, pdb) pdb_chain = pdb_structure[0][chain] ppb = PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain): Sequence = Sequence + pp.get_sequence() start = [residue.id[1] for residue in pdb_chain][0] if start is not 1: for residue in pdb_chain: residue.id = (' ', residue.id[1] - start + 1, ' ') io = PDBIO() io.set_structure(pdb_structure) # output = pdb[-8:-4] +"_"+chain+".pdb" output = "renumbered_" + pdb # out = open(output[:-4]+".fasta.txt","w") # out.write(">"+pdb[-8:-4]+"_"+chain+"\n") # out.write(str(Sequence)) # out.close() io.save(output, SelectChains(chain))