def test_polypeptide(self): """Tests on polypetide class and methods.""" p = PDBParser(PERMISSIVE=True) pdb1 = "PDB/1A8O.pdb" s = p.get_structure("scr", pdb1) ppb = PPBuilder() pp = ppb.build_peptides(s) self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(str(pp[2].get_sequence()), "TACQG") phi_psi = pp[0].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3) self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3) self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3) self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3) phi_psi = pp[1].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3) self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3) phi_psi = pp[2].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3) self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3) ppb = CaPPBuilder() pp = ppb.build_peptides(s) self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(str(pp[2].get_sequence()), "TACQG") self.assertEqual([ca.serial_number for ca in pp[0].get_ca_list()], [ 10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131, 139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242, 251, 260, 267, 276, 284 ]) taus = pp[1].get_tau_list() self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3) self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3) self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3) thetas = pp[2].get_theta_list() self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3) self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3) self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
def run_test(): from Bio.PDB import PDBParser, PPBuilder, CaPPBuilder # first make a PDB parser object p=PDBParser(PERMISSIVE=1) # get the structure, call it "example" structure=p.get_structure("example", "PDB/a_structure.pdb") # now loop over content and print some info for model in structure.get_list(): model_id=model.get_id() print "Model %i contains %i chains." % (model_id, len(model)) for chain in model.get_list(): chain_id=chain.get_id() print "\tChain '%s' contains %i residues." % (chain_id, len(chain)) for residue in chain.get_list(): residue_id=residue.get_id() hetfield, resseq, icode=residue_id print "\t\tResidue ('%s', %i, '%s') contains %i atoms." % (hetfield, resseq, icode, len(residue)) # check if there is disorder due to a point mutation --- this is rare if residue.is_disordered()==2: print "\t\t\tThere is a point mutation present in the crystal at this position." s="\t\t\tResidues at this position are " for resname in residue.disordered_get_id_list(): s=s+resname+" " print s[:-1]+"." # count the number of disordered atoms if residue.is_disordered()==1: disordered_count=0 for atom in residue.get_list(): if atom.is_disordered(): disordered_count=disordered_count+1 if disordered_count>0: print "\t\t\tThe residue contains %i disordered atoms." % disordered_count print "Polypeptides using C-N" ppb=PPBuilder() for pp in ppb.build_peptides(structure[1]): print pp print "Polypeptides using CA-CA" ppb=CaPPBuilder() for pp in ppb.build_peptides(structure[1]): print pp print "NeighborSearch test" quick_neighbor_search_test()
def run_test(): from Bio.PDB import PDBParser, PPBuilder, CaPPBuilder # first make a PDB parser object p = PDBParser(PERMISSIVE=1) # get the structure, call it "example" structure = p.get_structure("example", "PDB/a_structure.pdb") # now loop over content and print some info for model in structure.get_list(): model_id = model.get_id() print "Model %i contains %i chains." % (model_id, len(model)) for chain in model.get_list(): chain_id = chain.get_id() print "\tChain '%s' contains %i residues." % (chain_id, len(chain)) for residue in chain.get_list(): residue_id = residue.get_id() hetfield, resseq, icode = residue_id print "\t\tResidue ('%s', %i, '%s') contains %i atoms." % ( hetfield, resseq, icode, len(residue)) # check if there is disorder due to a point mutation --- this is rare if residue.is_disordered() == 2: print "\t\t\tThere is a point mutation present in the crystal at this position." s = "\t\t\tResidues at this position are " for resname in residue.disordered_get_id_list(): s = s + resname + " " print s[:-1] + "." # count the number of disordered atoms if residue.is_disordered() == 1: disordered_count = 0 for atom in residue.get_list(): if atom.is_disordered(): disordered_count = disordered_count + 1 if disordered_count > 0: print "\t\t\tThe residue contains %i disordered atoms." % disordered_count print "Polypeptides using C-N" ppb = PPBuilder() for pp in ppb.build_peptides(structure[1]): print pp print "Polypeptides using CA-CA" ppb = CaPPBuilder() for pp in ppb.build_peptides(structure[1]): print pp print "NeighborSearch test" quick_neighbor_search_test()
def write_backbone_angles(chain, region=None, offset=0, outfile=sys.stdout, header=False): """ Write Psi/Phi angles from a pdb file """ if region is None: region = (0, float('inf')) polypeptide_builder = PPBuilder() polypeptides = polypeptide_builder.build_peptides(chain) if header: print(HEADER, file=outfile) for peptide in polypeptides: angles = peptide.get_phi_psi_list() for residue, (phi, psi) in zip(peptide, angles): position = residue.get_id()[1] if region[0] <= position <= region[1]: print(chain.id, position, seq1(residue.get_resname()), position + offset, 'NA' if phi is None else phi * RAD_FACTOR, 'NA' if psi is None else psi * RAD_FACTOR, sep='\t', file=outfile)
def CreateJoinedFastas(input_PDB_objects): """ Joins many PDB objects and creates a FASTA file with all objects joined. Arguments: input_PDB_objects: list of PDB objects whose sequence will be added to the FASTA file. """ polipeptide = PPBuilder() first_line = True filename = "" # Create FASTA files. for obj in input_PDB_objects: filename = filename + obj.get_id() + "_" filename = filename + ".fa" joined_fasta = open(filename, 'w') # Write FASTA files. for obj in input_PDB_objects: if first_line: joined_fasta.write(">" + obj.get_id() + "\n") first_line = False else: joined_fasta.write("\n" + ">" + obj.get_id() + "\n") for polipep in polipeptide.build_peptides(obj): joined_fasta.write(str(polipep.get_sequence())) return filename
def get_secondary_structure(structure): rama_ss_ranges = [(-180, -180, 80, 60, 'E', 'blue'), (-180, 50, 80, 130, 'E', 'blue'), (-100, -180, 100, 60, 'P', 'green'), (-100, 50, 100, 130, 'P', 'green'), (-180, -120, 180, 170, 'H', 'red'), (0, -180, 180, 360, 'L', 'yellow')] # Calculate PSI and PHI ppb = PPBuilder() # PolyPeptideBuilder ss = ["" for x in range(N)] for chain in structure: for pp in ppb.build_peptides(chain): phi_psi = pp.get_phi_psi_list( ) # [(phi_residue_1, psi_residue_1), ...] for i, residue in enumerate(pp): # print(model, chain, i, residue, phi_psi[i]) # Convert radians to degrees and remove first and last value that are None if phi_psi[i][0] is not None and phi_psi[i][1] is not None: for x, y, w, h, ss_c, color in rama_ss_ranges: if x <= phi_psi[i][0] < x + w and y <= phi_psi[i][ 1] < y + h: ss[i] = ss_c break return ss
def SplitChain(PDB_objects): """ Splits a list of PDB files by chain creating one PDB and one FASTA file per chain. Arguments: PDB_objects: list of PDB objects (with many chains) generated by the PDB parser. """ File_prefix = [] for pdb in PDB_objects: chain_names = set() io = PDBIO() # Creates a PDB file for each chain of the original file. for chain in pdb.get_chains(): if chain.get_id() not in chain_names: io.set_structure(chain) io.save(pdb.get_id() + "_" + chain.get_id() + ".pdb") File_prefix.append(pdb.get_id() + "_" + chain.get_id()) # Creates a FASTA file for each chain of the original file. polipeptide = PPBuilder() for pp in polipeptide.build_peptides(pdb): fasta = open(pdb.get_id() + "_" + chain.get_id() + ".fa", "w") fasta.write(">" + pdb.get_id() + "_" + chain.get_id() + "\n") fasta.write(str(pp.get_sequence())) chain_names.add(chain.get_id()) return File_prefix
def test_ppbuilder_torsion(self): """Test phi/psi angles calculated with PPBuilder.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure) phi_psi = pp[0].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3) self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3) self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3) self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3) phi_psi = pp[1].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3) self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3) phi_psi = pp[2].get_phi_psi_list() self.assertIsNone(phi_psi[0][0]) self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3) self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3)
def compute_secondary_structure(self, model): """ This function defines all the secondary structures of the model passed in input :param model: one model :return: the matrix of secondary structures """ # Calculate PSI and PHI ppb = PPBuilder() rama = { } # { chain : [[residue_1, ...], [phi_residue_1, ...], [psi_residue_2, ...] ] } residue_found = 0 for chain in model: for pp in ppb.build_peptides(chain): phi_psi = pp.get_phi_psi_list() for i, residue in enumerate(pp): if phi_psi[i][0] is not None and phi_psi[i][1] is not None: # Conversion to degrees when the values are not None (for first and last) rama.setdefault(chain.id, [[], [], []]) rama[chain.id][0].append(residue) rama[chain.id][1].append(math.degrees(phi_psi[i][0])) rama[chain.id][2].append(math.degrees(phi_psi[i][1])) else: # Adding of Nan if the angles are None (for first and last) rama.setdefault(chain.id, [[], [], []]) rama[chain.id][0].append(residue) rama[chain.id][1].append(math.nan) rama[chain.id][2].append(math.nan) residue_found += 1 # Eventual nan-padding if something goes wrong during the angle computation if residue_found < self._residues: for i in range(self._residues - residue_found): rama.setdefault('Z', [[], [], []]) rama['Z'][0].append(None) rama['Z'][1].append(math.nan) rama['Z'][2].append(math.nan) # Comparison of the angles with the Ramachandran regions ss = [] for chain_id in rama: for residue, phi, psi in zip(*rama[chain_id]): ss_class = None if math.isnan(phi) and math.isnan(psi): # If nan (angles not available) insert a symbol indicating this situation ss_class = '-' else: # Determine the correspondent region and store it for x, y, width, height, ss_c, color in self._ranges: if x <= phi < x + width and y <= psi < y + height: ss_class = ss_c break ss.append(ss_class) return ss
def test_polypeptide(self): """Tests on polypetide class and methods.""" p = PDBParser(PERMISSIVE=True) pdb1 = "PDB/1A8O.pdb" s = p.get_structure("scr", pdb1) ppb = PPBuilder() pp = ppb.build_peptides(s) self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(str(pp[2].get_sequence()), "TACQG") phi_psi = pp[0].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3) self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3) self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3) self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3) phi_psi = pp[1].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3) self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3) phi_psi = pp[2].get_phi_psi_list() self.assertEqual(phi_psi[0][0], None) self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3) self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3) self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3) self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3) self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3) ppb = CaPPBuilder() pp = ppb.build_peptides(s) self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(str(pp[2].get_sequence()), "TACQG") self.assertEqual([ca.serial_number for ca in pp[0].get_ca_list()], [10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131, 139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242, 251, 260, 267, 276, 284]) taus = pp[1].get_tau_list() self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3) self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3) self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3) thetas = pp[2].get_theta_list() self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3) self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3) self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
def test_c_n(self): """Extract polypeptides using C-N.""" ppbuild = PPBuilder() polypeptides = ppbuild.build_peptides(self.structure[1]) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 2) self.assertEqual(pp[-1].get_id()[1], 86)
def get_structure_sequence(struct): # type: (Structure) -> str """ Gets the structure sequence using PPBuilder :param struct: Structure object :return: struct sequence """ ppb = PPBuilder() return ''.join( [str(pp.get_sequence()) for pp in ppb.build_peptides(struct)])
def is_protein(chain): """ Check if chain is a protein. :param chain: :return: """ ppb = PPBuilder() for pp in ppb.build_peptides(chain): if len(pp.get_sequence()) > 0: return True return False
def chain_to_one_pp(chain): ppb = PPBuilder() polypeptides = ppb.build_peptides(chain) if len(polypeptides) != 1: print('warning ', len(polypeptides), ' polypeptides from one chain, extending first pp') for pp in polypeptides[1:]: polypeptides[0].extend(pp) return polypeptides[0]
def structure_filtered_dca_get_sequence_from_structure(structure): from Bio.PDB import PPBuilder sequence = "" ppb = PPBuilder(radius=10.0) for pp in ppb.build_peptides(structure, aa_only=False): sequence += '%s\n' % pp.get_sequence() return sequence.replace('\n', '')
def run(infile, splitpdb): parser = PDBParser() struct = parser.get_structure('mystruct', infile) ppb = PPBuilder() basename = os.path.basename(infile) prefix = os.path.splitext(basename)[0] if splitpdb == 0: # We do NOT split the PDB and fasta files! seqfile = open(prefix + '.fasta', 'w') pdbio = PDBIO_RPL.PDBIO() pdbio.set_structure(struct) cleanfile = prefix + '_clean.pdb' pdbio.save(cleanfile) ListChains = [] for model in struct: for chain in model: ListChains.append(chain.id) ListPpdb = ppb.build_peptides(chain) if (len(ListPpdb) > 0): for index, pp in enumerate(ListPpdb): # print(chain.id,index,pp.get_sequence(),pp if splitpdb == 1: # We split the PDB and fasta files! seqfile = open( prefix + '_' + chain.id + '.' + str(index) + '.fasta', 'w') seq = pp.get_sequence() seqfile.write('>%s %s\n' % (prefix + '_chain_' + chain.id + '_' + str(index), len(seq))) seqfile.write('%s' % seq) seqfile.write('\n') if splitpdb == 1: # We split the PDB and fasta files! seqfile.close() startres = pp[0].id[1] endres = pp[-1].id[1] ofile = prefix + '_' + chain.id + '.' + str( index) + '.pdb' Dice_RPL.extract(struct, chain.id, startres, endres, ofile) else: # Also split chains that do not consist of amino acids! ChainList = chain.get_list() startres = ChainList[0].id[1] endres = ChainList[0].id[-1] ofile = prefix + '_' + chain.id + '.' + str(index) + '.pdb' Dice_RPL.extract(struct, chain.id, startres, endres, ofile) if splitpdb == 0: # We do NOT split the PDB and fasta files! seqfile.close() return ListChains
def test_c_n(self): """Extract polypeptides using C-N.""" ppbuild = PPBuilder() polypeptides = ppbuild.build_peptides(self.structure[1]) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 2) self.assertEqual(pp[-1].get_id()[1], 86) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("RCGSQGGGSTCPGLRCCSIWGWCGDSEPYCGRTCENKCWSGER" "SDHRCGAAVGNPPCGQDRCCSVHGWCGGGNDYCSGGNCQYRC", str(s))
def get_sequence(self, chain_id): """ Input: self: Use Biopython.PDB structure which has been stored in an object variable chain_id : String (usually in ['A','B', 'C' ...]. The number of chains depends on the specific protein and the resulting structure) Return: Return the amino acid sequence (single-letter alphabet!) of a given chain (chain_id) in a Biopython.PDB structure as a string. """ sequence = 'SEQWENCE' ppb = PPBuilder() for pp in ppb.build_peptides(self.structure[0][chain_id]): return pp.get_sequence()
def test_ppbuilder_real_nonstd(self): """Test PPBuilder on real PDB file allowing non-standard amino acids.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure, False) self.assertEqual(len(pp), 1) # Check the start and end positions self.assertEqual(pp[0][0].get_id()[1], 151) self.assertEqual(pp[0][-1].get_id()[1], 220) # Check the sequence s = pp[0].get_sequence() self.assertIsInstance(s, Seq) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG", s)
def getSeqLocation(self, seq): # return sequence position and chain id ppb=PPBuilder() bltpep = ppb.build_peptides(self.__struct[0]) for pp in bltpep: beg = 0 end = 0 s = str(pp.get_sequence()) ind = s.find(seq, 0, len(s)) if (ind != -1): beg = beg + ind end = beg + len(seq) - 1 chain = pp[0].get_parent().get_id() break if beg == end == 0: line = '\n' + seq + ' not found in '+str(self.__struct.get_id()) + '!\n' self.printerr(line) return None, None, None return beg, end, chain
def get_sequence(pdb, chain): pdb_parser = PDBParser(PERMISSIVE=0) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb,pdb) pdb_chain = pdb_structure[0][chain] ppb=PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain): Sequence = Sequence + pp.get_sequence() io = PDBIO() io.set_structure(pdb_structure) output = pdb[-8:-4] +"_"+chain+".pdb" # output = pdb out = open(output[:-4]+chain+".fasta.txt","w") out.write(">"+pdb[:-4]+chain+"\n") out.write(str(Sequence)+"\n") out.close() io.save(output,SelectChains(chain))
def getRegionsResidues(self): # fill self.__regions_res dictionary with list of residues ppb=PPBuilder() # for every region contained in self.__regions_res res = [] bltpep = ppb.build_peptides(self.__struct[0]) for key in self.__regions_res: for pp in bltpep: s = str(pp.get_sequence()) reg_seq = list(self.__regions.get_group(key)['tcr_region_seq'])[0] ind = s.find(reg_seq, 0, len(s)) if (ind != -1): for i in range(ind, ind + len(reg_seq)): res.append(pp[i]) self.__regions_res[key] = res break if not res: line = '\n' + reg_seq + ' not found in '+ self.__name + '!\n' self.printerr('getRegionResidues(): ' + line) return 0 res = [] return 1
def get_pp(pdb, chain, start, length, seq): """retrieve the residiues for a given pdb file and chain as polypeptides""" f = make_filename(pdb) p = PDBParser(PERMISSIVE=1) pdb_struct = p.get_structure( pdb, f) # Load the pdb structure pdb contained on the file f. pdb_chain = pdb_struct[0][ chain] # Select the right Chain of the structure. ppb = PPBuilder() # Initialize a peptide builder. peptides = ppb.build_peptides( pdb_chain) # Load the given chain as a peptide. for i, pep in enumerate(peptides): if str(pep.get_sequence()).find(seq) != -1: start = str(pep.get_sequence()).find(seq) break if start > 0 and (start + length + 2) <= len(pep): pp = pep[(start - 1):(start + length + 2)] return pp else: raise
def get_sequence(pdb, chain): if chain is "%": chain = " " warnings.filterwarnings('always', message='.*discontinuous at.*') pdb_parser = PDBParser( PERMISSIVE=0, QUIET=True ) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb, pdb) pdb_chain = pdb_structure[0][chain] ppb = PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain, aa_only=False): Sequence = Sequence + pp.get_sequence() io = PDBIO() io.set_structure(pdb_structure) output = pdb[0:-4] + ".pdb" out = open(output[:-4] + ".fasta.atom", "w") out.write(">" + pdb[0:-4] + "\n") out.write(str(Sequence) + "\n") out.close()
def get_sequence(pdb, chain, first, last, output): pdb_parser = PDBParser(PERMISSIVE=0) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb,pdb) pdb_chain = pdb_structure[0][chain] ppb=PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain): Sequence = Sequence + pp.get_sequence() io = PDBIO() io.set_structure(pdb_structure) # if pdb[-5] == chain: # output = pdb # else: # output = pdb[:-4]+chain+".pdb" ### writing out sequence to fasta # out = open(output[:-4]+".fasta.txt","w") # out.write(">"+output[:-4]+"\n") # out.write(str(Sequence[first-1: last-2])+"\n") # out.close() io.save(output,SelectDomain(chain, first, last))
def test_ppbuilder_real(self): """Test PPBuilder on real PDB file.""" ppb = PPBuilder() pp = ppb.build_peptides(self.structure) self.assertEqual(len(pp), 3) # Check termini self.assertEqual(pp[0][0].get_id()[1], 152) self.assertEqual(pp[0][-1].get_id()[1], 184) self.assertEqual(pp[1][0].get_id()[1], 186) self.assertEqual(pp[1][-1].get_id()[1], 213) self.assertEqual(pp[2][0].get_id()[1], 216) self.assertEqual(pp[2][-1].get_id()[1], 220) # Now check sequences pp0_seq = pp[0].get_sequence() pp1_seq = pp[1].get_sequence() pp2_seq = pp[2].get_sequence() self.assertIsInstance(pp0_seq, Seq) self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW") self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE") self.assertEqual(pp2_seq, "TACQG")
def get_sequence(pdb, chain): pdb_parser = PDBParser( PERMISSIVE=0 ) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb, pdb) pdb_chain = pdb_structure[0][chain] ppb = PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain): Sequence = Sequence + pp.get_sequence() start = [residue.id[1] for residue in pdb_chain][0] if start is not 1: for residue in pdb_chain: residue.id = (' ', residue.id[1] - start + 1, ' ') io = PDBIO() io.set_structure(pdb_structure) # output = pdb[-8:-4] +"_"+chain+".pdb" output = "renumbered_" + pdb # out = open(output[:-4]+".fasta.txt","w") # out.write(">"+pdb[-8:-4]+"_"+chain+"\n") # out.write(str(Sequence)) # out.close() io.save(output, SelectChains(chain))
def handle(self, *args, **options): self.options = options if self.options['purge']: Residue.objects.filter( protein_conformation__protein__entry_name__endswith='_a', protein_conformation__protein__family__parent__parent__name= 'Alpha').delete() ProteinConformation.objects.filter( protein__entry_name__endswith='_a', protein__family__parent__parent__name='Alpha').delete() Protein.objects.filter( entry_name__endswith='_a', family__parent__parent__name='Alpha').delete() # Building protein and protconf objects for g protein structure in complex scs = SignprotComplex.objects.all() for sc in scs: self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} is building' .format(sc)) try: # Alpha subunit try: alpha_protein = Protein.objects.get( entry_name=sc.structure.pdb_code.index.lower() + '_a') except: alpha_protein = Protein() alpha_protein.entry_name = sc.structure.pdb_code.index.lower( ) + '_a' alpha_protein.accession = None alpha_protein.name = sc.structure.pdb_code.index.lower( ) + '_a' alpha_protein.sequence = sc.protein.sequence alpha_protein.family = sc.protein.family alpha_protein.parent = sc.protein alpha_protein.residue_numbering_scheme = sc.protein.residue_numbering_scheme alpha_protein.sequence_type = ProteinSequenceType.objects.get( slug='mod') alpha_protein.source = ProteinSource.objects.get( name='OTHER') alpha_protein.species = sc.protein.species alpha_protein.save() try: alpha_protconf = ProteinConformation.objects.get( protein__entry_name=sc.structure.pdb_code.index.lower( ) + '_a') except: alpha_protconf = ProteinConformation() alpha_protconf.protein = alpha_protein alpha_protconf.state = ProteinState.objects.get( slug='active') alpha_protconf.save() pdbp = PDBParser(PERMISSIVE=True, QUIET=True) s = pdbp.get_structure('struct', StringIO(sc.structure.pdb_data.pdb)) chain = s[0][sc.alpha] nums = [] for res in chain: try: res['CA'] nums.append(res.get_id()[1]) except: pass resis = Residue.objects.filter( protein_conformation__protein=sc.protein) num_i = 0 temp_seq2 = '' pdb_num_dict = OrderedDict() # Create first alignment based on sequence numbers for n in nums: if sc.structure.pdb_code.index == '6OIJ' and n < 30: nr = n + 6 else: nr = n pdb_num_dict[n] = [chain[n], resis.get(sequence_number=nr)] # Find mismatches mismatches = [] for n, res in pdb_num_dict.items(): if AA[res[0].get_resname()] != res[1].amino_acid: mismatches.append(res) pdb_lines = sc.structure.pdb_data.pdb.split('\n') seqadv = [] for l in pdb_lines: if l.startswith('SEQADV'): seqadv.append(l) mutations, shifted_mutations = OrderedDict(), OrderedDict() # Search for annotated engineered mutations in pdb SEQADV for s in seqadv: line_search = re.search( 'SEQADV\s{1}[A-Z\s\d]{4}\s{1}([A-Z]{3})\s{1}([A-Z]{1})\s+(\d+)[\s\S\d]{5}([\s\S\d]{12})([A-Z]{3})\s+(\d+)(\s\S+)', s) if line_search != None: if line_search.group(2) == sc.alpha: if line_search.group( 4).strip() == sc.protein.accession: if line_search.group(3) == line_search.group( 6): mutations[int(line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] else: shifted_mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5), int(line_search.group(6)) ] else: # Exception for 6G79 if line_search.group(3) != line_search.group( 6) and 'CONFLICT' in line_search.group( 7): mutations[int(line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] # Exception for 5G53 if line_search.group( 4).strip() != sc.protein.accession: mutations[int(line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] remaining_mismatches = [] # Check and clear mismatches that are registered in pdb SEQADV as engineered mutation for m in mismatches: num = m[0].get_id()[1] if num in mutations: if m[0].get_resname() != mutations[num][0] and m[ 1].amino_acid != AA[mutations[num][1]]: remaining_mismatches.append(m) elif num in shifted_mutations: remaining_mismatches.append(m) else: remaining_mismatches.append(m) ### sanity check # print(mutations) # print(shifted_mutations) # print(mismatches) # print(remaining_mismatches) # pprint.pprint(pdb_num_dict) # Mismatches remained possibly to seqnumber shift, making pairwise alignment to try and fix alignment if len(remaining_mismatches ) > 0 and sc.structure.pdb_code.index not in [ '6OIJ', '6OY9', '6OYA' ]: ppb = PPBuilder() seq = '' for pp in ppb.build_peptides(chain, aa_only=False): seq += str(pp.get_sequence()) pw2 = pairwise2.align.localms(sc.protein.sequence, seq, 2, -1, -.5, -.1) ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1]) wt_pdb_dict = OrderedDict() pdb_wt_dict = OrderedDict() j, k = 0, 0 for i, ref, temp in zip(range(0, len(ref_seq)), ref_seq, temp_seq): if ref != '-' and temp != '-': wt_pdb_dict[resis[j]] = pdb_num_dict[nums[k]] pdb_wt_dict[pdb_num_dict[nums[k]][0]] = resis[j] j += 1 k += 1 elif ref == '-': wt_pdb_dict[i] = pdb_num_dict[nums[k]] pdb_wt_dict[pdb_num_dict[nums[k]][0]] = i k += 1 elif temp == '-': wt_pdb_dict[resis[j]] = i pdb_wt_dict[i] = resis[j] j += 1 for i, r in enumerate(remaining_mismatches): # Adjust for shifted residue when residue is a match if r[0].get_id()[1] - remaining_mismatches[ i - 1][0].get_id()[1] > 1: pdb_num_dict[r[0].get_id()[1] - 1][1] = pdb_wt_dict[chain[ r[0].get_id()[1] - 1]] # Adjust for shifted residue when residue is mutated and it's logged in SEQADV if r[0].get_id()[1] in shifted_mutations: pdb_num_dict[r[0].get_id()[1]][1] = resis.get( sequence_number=shifted_mutations[ r[0].get_id()[1]][2]) # Adjust for shift else: pdb_num_dict[r[0].get_id()[1]][1] = pdb_wt_dict[ r[0]] bulked_residues = [] for key, val in pdb_num_dict.items(): # print(key, val) # sanity check res_obj = Residue() res_obj.sequence_number = val[0].get_id()[1] res_obj.amino_acid = AA[val[0].get_resname()] res_obj.display_generic_number = val[ 1].display_generic_number res_obj.generic_number = val[1].generic_number res_obj.protein_conformation = alpha_protconf res_obj.protein_segment = val[1].protein_segment bulked_residues.append(res_obj) Residue.objects.bulk_create(bulked_residues) self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} is finished' .format(sc)) except Exception as msg: print( 'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed' .format(sc)) print(msg) self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed' .format(sc))
from Bio.SeqRecord import SeqRecord structures = [] pdb_ids = [] structures_dir = "pdb_structures" parser = MMCIFParser() # Read structures from IO for item in listdir(structures_dir): if item.find('.') == -1: for subitem in listdir(structures_dir + "/" + item): print("Parsing " + subitem) structures.append( parser.get_structure( subitem[:4], structures_dir + "/" + item + "/" + subitem)) pdb_ids.append(subitem[:4]) # Extract peptide sequences and write to sequence_from_structure ppb = PPBuilder() for i, structure in enumerate(structures): pdb_id = pdb_ids[i] print(pdb_id) peptides = ppb.build_peptides(structure) seqs = [] for peptide in peptides: seqs.append(peptide.get_sequence()) sorted_seqs = sorted(seqs, key=len) AlignIO.write( MultipleSeqAlignment([SeqRecord(sorted_seqs[-1], id=pdb_id)]), "sequence_from_structure/" + pdb_id + ".fasta", "fasta")
from Bio.PDB import PDBParser from Bio.PDB import PPBuilder from Bio.PDB import Polypeptide item = '2bnr' structure = PDBParser().get_structure(item, '../pdbs/'+item+'.pdb') ppb=PPBuilder() peps = ppb.build_peptides(structure) print structure.get_id() print peps[0] #print peps[0][1:-3] print peps[0][3:9] p = peps[0][3:9] print peps[0][1].get_resname()
if filename.endswith(".pdb"): # dataset_dict[filename] = idx dataset_filenames.append(filename) idx += 1 pdb_to_seq = {} parser = PDBParser() ppb = PPBuilder() i = 0 for filename in dataset_filenames: with warnings.catch_warnings(record=True): with open(os.path.join(Constants.PDB_PATH, filename)) as f: structure = parser.get_structure(os.path.splitext(filename)[0], f) model = structure[0] for pp in ppb.build_peptides(model): #print(pp.get_sequence()) pdb_to_seq[filename] = str(pp.get_sequence()) break file_to_ds = {} with open(Constants.TRAIN_VAL_TEST_SPLIT_FILE_PATH) as file: split_d = json.load(file) for tr_val_or_test, filenames in split_d.items(): for fn in filenames: file_to_ds[fn] = tr_val_or_test seq_to_pdbs = {} for pdb, seq in pdb_to_seq.items():
chain = struc[0][chainid] resnums = [resi.id[1] for resi in chain] # calphas = [resi['CA'] for resi in chain] #print code + chainid # find gaps in numbering breaks = [ j for i, j in enumerate(resnums) if i != 0 and j != resnums[i - 1] + 1 ] # dists = [j - calphas[i-1] for i,j in enumerate(calphas) if i != 0 ] # measure c-alpha distances # breakdists = [j for i,j in enumerate(calphas) if i != 0 and (j - calphas[i-1]) > 4] #print breakdists #print breaks # use in built polypeptide builder ppb = PPBuilder() if len(ppb.build_peptides(struc[0][chainid])) > 1: with open("bad.5codes", 'a') as fout: fout.write(code + '\n') if False: #for pp in ppb.build_peptides(struc): print pp.get_sequence() io.set_structure(pp) io.save("/tmp/test.pdb") else: with open("good.5codes", 'a') as fout: fout.write(code + '\n') #print '\n'.join(map(str,dists)) #if len(breaks) > 0: if False: print "breaks", breaks print resnums
# list[n].append(atom) # previous = atom # return list if __name__ == "__main__": current_path = os.path.dirname(sys.argv[0]) pdb_path = current_path + '../pdb/' pdb_id = '2vb1' structure = get_structure(pdb_id, pdb_path) model = structure[0] ppb = PPBuilder() pp_list = ppb.build_peptides(model) # orient orient(pp_list) # first split stage fs = first_split(pp_list) for seg in fs: pp = Polypeptide.Polypeptide(seg) print pp.get_sequence()
def handle(self, *args, **options): startTime = datetime.datetime.now() self.options = options if self.options["purge"]: Residue.objects.filter( protein_conformation__protein__entry_name__endswith="_a", protein_conformation__protein__family__parent__parent__name= "Alpha").delete() ProteinConformation.objects.filter( protein__entry_name__endswith="_a", protein__family__parent__parent__name="Alpha").delete() Protein.objects.filter( entry_name__endswith="_a", family__parent__parent__name="Alpha").delete() SignprotStructureExtraProteins.objects.all().delete() SignprotStructure.objects.all().delete() if not options["only_signprot_structures"]: # Building protein and protconf objects for g protein structure in complex if options["s"]: scs = SignprotComplex.objects.filter( structure__pdb_code__index__in=[ i.upper() for i in options["s"] ]) else: scs = SignprotComplex.objects.all() for sc in scs: self.logger.info( "Protein, ProteinConformation and Residue build for alpha subunit of {} is building" .format(sc)) try: # Alpha subunit try: alpha_protein = Protein.objects.get( entry_name=sc.structure.pdb_code.index.lower() + "_a") except: alpha_protein = Protein() alpha_protein.entry_name = sc.structure.pdb_code.index.lower( ) + "_a" alpha_protein.accession = None alpha_protein.name = sc.structure.pdb_code.index.lower( ) + "_a" alpha_protein.sequence = sc.protein.sequence alpha_protein.family = sc.protein.family alpha_protein.parent = sc.protein alpha_protein.residue_numbering_scheme = sc.protein.residue_numbering_scheme alpha_protein.sequence_type = ProteinSequenceType.objects.get( slug="mod") alpha_protein.source = ProteinSource.objects.get( name="OTHER") alpha_protein.species = sc.protein.species alpha_protein.save() try: alpha_protconf = ProteinConformation.objects.get( protein__entry_name=sc.structure.pdb_code.index. lower() + "_a") except: alpha_protconf = ProteinConformation() alpha_protconf.protein = alpha_protein alpha_protconf.state = ProteinState.objects.get( slug="active") alpha_protconf.save() pdbp = PDBParser(PERMISSIVE=True, QUIET=True) s = pdbp.get_structure("struct", StringIO(sc.structure.pdb_data.pdb)) chain = s[0][sc.alpha] nums = [] for res in chain: if "CA" in res and res.id[0] == " ": nums.append(res.get_id()[1]) resis = Residue.objects.filter( protein_conformation__protein=sc.protein) num_i = 0 temp_seq2 = "" pdb_num_dict = OrderedDict() # Create first alignment based on sequence numbers for n in nums: if sc.structure.pdb_code.index == "6OIJ" and n < 30: nr = n + 6 else: nr = n pdb_num_dict[n] = [ chain[n], resis.get(sequence_number=nr) ] # Find mismatches mismatches = [] for n, res in pdb_num_dict.items(): if AA[res[0].get_resname()] != res[1].amino_acid: mismatches.append(res) pdb_lines = sc.structure.pdb_data.pdb.split("\n") seqadv = [] for l in pdb_lines: if l.startswith("SEQADV"): seqadv.append(l) mutations, shifted_mutations = OrderedDict(), OrderedDict() # Search for annotated engineered mutations in pdb SEQADV for s in seqadv: line_search = re.search( "SEQADV\s{1}[A-Z\s\d]{4}\s{1}([A-Z]{3})\s{1}([A-Z]{1})\s+(\d+)[\s\S\d]{5}([\s\S\d]{12})([A-Z]{3})\s+(\d+)(\s\S+)", s) if line_search != None: if line_search.group(2) == sc.alpha: if line_search.group( 4).strip() == sc.protein.accession: if line_search.group( 3) == line_search.group(6): mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] else: shifted_mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5), int(line_search.group(6)) ] else: # Exception for 6G79 if line_search.group( 3 ) != line_search.group( 6 ) and "CONFLICT" in line_search.group(7): mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] # Exception for 5G53 if line_search.group( 4).strip() != sc.protein.accession: mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] remaining_mismatches = [] # Check and clear mismatches that are registered in pdb SEQADV as engineered mutation for m in mismatches: num = m[0].get_id()[1] if num in mutations: if m[0].get_resname() != mutations[num][0] and m[ 1].amino_acid != AA[mutations[num][1]]: remaining_mismatches.append(m) elif num in shifted_mutations: remaining_mismatches.append(m) else: remaining_mismatches.append(m) if options["debug"]: print(sc) print(mutations) print(shifted_mutations) print(mismatches) print("======") print(remaining_mismatches) pprint.pprint(pdb_num_dict) no_seqnum_shift = [ '6OY9', '6OYA', '6LPB', '6WHA', '7D77', '6XOX', '7L1U', '7L1V' ] # Check if HN is mutated to GNAI1 for the scFv16 stabilizer if sc.protein.entry_name != 'gnai1_human' and len( remaining_mismatches) > 0: target_HN = resis.filter(protein_segment__slug='HN') gnai1_HN = Residue.objects.filter( protein_conformation__protein__entry_name= 'gnai1_human', protein_segment__slug='HN') pdb_HN_seq = '' for num, val in pdb_num_dict.items(): if num <= target_HN.reverse()[0].sequence_number: pdb_HN_seq += Polypeptide.three_to_one( val[0].get_resname()) if options['debug']: print('Checking if HN is gnai1_human') print(pdb_HN_seq) print(''.join( gnai1_HN.values_list('amino_acid', flat=True))) gnai1_HN_seq = ''.join( gnai1_HN.values_list('amino_acid', flat=True)) pw2 = pairwise2.align.localms(gnai1_HN_seq, pdb_HN_seq, 3, -4, -3, -1) ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1]) length, match = 0, 0 for r, t in zip(ref_seq, temp_seq): if options['debug']: print(r, t) if t != '-': if r == t: match += 1 length += 1 identity = match / length * 100 if options['debug']: print(identity) if identity > 85: if sc.structure.pdb_code.index not in ['7DFL']: no_seqnum_shift.append( sc.structure.pdb_code.index) if options['debug']: print( 'INFO: HN has {}% with gnai1_human HN, skipping seqnum shift correction' .format(round(identity))) # Mismatches remained possibly to seqnumber shift, making pairwise alignment to try and fix alignment if len( remaining_mismatches ) > 0 and sc.structure.pdb_code.index not in no_seqnum_shift: ppb = PPBuilder() seq = "" for pp in ppb.build_peptides(chain, aa_only=False): seq += str(pp.get_sequence()) if sc.structure.pdb_code.index in [ '7JVQ', '7L1U', '7L1V' ]: pw2 = pairwise2.align.localms( sc.protein.sequence, seq, 3, -4, -3, -1) else: pw2 = pairwise2.align.localms( sc.protein.sequence, seq, 2, -1, -.5, -.1) ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1]) # Custom fix for A->G mutation at pos 18 if sc.structure.pdb_code.index == '7JJO': ref_seq = ref_seq[:18] + ref_seq[19:] temp_seq = temp_seq[:17] + temp_seq[18:] # Custom alignment fixes elif sc.structure.pdb_code.index == '7DFL': ref_seq = 'MTLESIMACCLSEEAKEARRINDEIERQLRRDKRDARRELKLLLLGTGESGKSTFIKQMRIIHGSGYSDEDKRGFTKLVYQNIFTAMQAMIRAMDTLKIPYKYEHNKAHAQLVREVDVEKVSAFENPYVDAIKSLWNDPGIQECYDRRREYQLSDSTKYYLNDLDRVADPAYLPTQQDVLRVRVPTTGIIEYPFDLQSVIFRMVDVGGQRSERRKWIHCFENVTSIMFLVALSEYDQVLVESDNENRMEESKALFRTIITYPWFQNSSVILFLNKKDLLEEKIMYSHLVDYFPEYDGPQRDAQAAREFILKMFVDLNPDSDKIIYSHFTCATDTENIRFVFAAVKDTILQLNLKEYNLV' temp_seq = '--------CTLSAEDKAAVERSKMIDRNLREDGEKARRELKLLLLGTGESGKSTFIKQMRIIHG--------------------------------------------------------------------------------------------------------------------------TGIIEYPFDLQSVIFRMVDVGGQRSERRKWIHCFENVTSIMFLVALSEYDQV----DNENRMEESKALFRTIITYPWFQNSSVILFLNKKDLLEEKIMYSHLVDYFPEYDGPQRDAQAAREFILKMFVDLNPDSDKILYSHFTCATDTENIRFVFAAVKDTILQLNLKEYNLV' elif sc.structure.pdb_code.index == '7JOZ': temp_seq = temp_seq[:67] + ( '-' * 14) + 'FNGDS' + temp_seq[86:] elif sc.structure.pdb_code.index == '7AUE': ref_seq = ref_seq[:31].replace('-', '') + ref_seq[31:] temp_seq = ( 9 * '-') + temp_seq[2:5] + temp_seq[5:54].replace( '-', '') + temp_seq[54:] wt_pdb_dict = OrderedDict() pdb_wt_dict = OrderedDict() j, k = 0, 0 for i, ref, temp in zip(range(0, len(ref_seq)), ref_seq, temp_seq): if options["debug"]: print(i, ref, temp) # alignment check if ref != "-" and temp != "-": wt_pdb_dict[resis[j]] = pdb_num_dict[nums[k]] pdb_wt_dict[pdb_num_dict[nums[k]] [0]] = resis[j] j += 1 k += 1 elif ref == "-": wt_pdb_dict[i] = pdb_num_dict[nums[k]] pdb_wt_dict[pdb_num_dict[nums[k]][0]] = i k += 1 elif temp == "-": wt_pdb_dict[resis[j]] = i pdb_wt_dict[i] = resis[j] j += 1 # Custom fix for 7JJO isoform difference if sc.structure.pdb_code.index in [ '7JJO', '7JOZ', '7AUE' ]: pdb_num_dict = OrderedDict() for wt_res, st_res in wt_pdb_dict.items(): if type(st_res) == type([]): pdb_num_dict[wt_res.sequence_number] = [ st_res[0], wt_res ] else: for i, r in enumerate(remaining_mismatches): # Adjust for shifted residue when residue is a match if r[0].get_id()[1] - remaining_mismatches[ i - 1][0].get_id()[1] > 1: pdb_num_dict[r[0].get_id()[1] - 1][1] = pdb_wt_dict[chain[ r[0].get_id()[1] - 1]] # Adjust for shifted residue when residue is mutated and it's logged in SEQADV if r[0].get_id()[1] in shifted_mutations: pdb_num_dict[ r[0].get_id()[1]][1] = resis.get( sequence_number=shifted_mutations[ r[0].get_id()[1]][2]) # Adjust for shift else: pdb_num_dict[r[0].get_id() [1]][1] = pdb_wt_dict[r[0]] if sc.structure.pdb_code.index == '7JVQ': pdb_num_dict[198][1] = Residue.objects.get( protein_conformation__protein=sc.protein, sequence_number=346) pdb_num_dict[235][1] = Residue.objects.get( protein_conformation__protein=sc.protein, sequence_number=383) elif sc.structure.pdb_code.index == '6PB0': pdb_num_dict[205][1] = Residue.objects.get( protein_conformation__protein=sc.protein, sequence_number=205) ### Custom alignment fix for 6WHA mini-Gq/Gi2/Gs chimera elif sc.structure.pdb_code.index == "6WHA": ref_seq = "MTLESIMACCLSEEAKEARRINDEIERQLRRDKRDARRELKLLLLGTGESGKSTFIKQMRIIHGSGYSDEDKRGFTKLVYQNIFTAMQAMIRAMDTLKIPYKYEHNKAHAQLVREVDVEKVSAFENPYVDAIKSLWNDPGIQECYDRRREYQLSDSTKYYLNDLDRVADPAYLPTQQDVLRVRVPTTGIIEYPFDLQSVIFRMVDVGGQRSERRKWIHCFENVTSIMFLVALSEYDQVLVESDNENRMEESKALFRTIITYPWFQNSSVILFLNKKDLLEEKIM--YSHLVDYFPEYDGP----QRDAQAAREFILKMFVDL---NPDSDKIIYSHFTCATDTENIRFVFAAVKDTILQLNLKEYNLV" temp_seq = "----------VSAEDKAAAERSKMIDKNLREDGEKARRTLRLLLLGADNSGKSTIVK----------------------------------------------------------------------------------------------------------------------------------GIFETKFQVDKVNFHMFDVG-----RRKWIQCFNDVTAIIFVVDSSDYNR----------LQEALNDFKSIWNNRWLRTISVILFLNKQDLLAEKVLAGKSKIEDYFPEFARYTTPDPRVTRAKY-FIRKEFVDISTASGDGRHICYPHFTC-VDTENARRIFNDCKDIILQMNLREYNLV" pdb_num_dict = OrderedDict() temp_resis = [res for res in chain] temp_i = 0 mapped_cgns = [] for i, aa in enumerate(temp_seq): if aa != "-": ref_split_on_gaps = ref_seq[:i + 1].split("-") ref_seqnum = i - (len(ref_split_on_gaps) - 1) + 1 res = resis.get(sequence_number=ref_seqnum) if res.display_generic_number.label in mapped_cgns: next_presumed_cgn = self.get_next_presumed_cgn( res) if next_presumed_cgn: res = next_presumed_cgn while res and res.display_generic_number.label in mapped_cgns: res = self.get_next_presumed_cgn( res) else: print( "Error: {} CGN does not exist. Incorrect mapping of {} in {}" .format(next_presumed_cgn, chain[nums[temp_i]], sc.structure)) mapped_cgns.append( res.display_generic_number.label) pdb_num_dict[nums[temp_i]] = [ chain[nums[temp_i]], res ] temp_i += 1 bulked_rotamers = [] for key, val in pdb_num_dict.items(): # print(key, val) # sanity check if not isinstance(val[1], int): res_obj = Residue() res_obj.sequence_number = val[0].get_id()[1] res_obj.amino_acid = AA[val[0].get_resname()] res_obj.display_generic_number = val[ 1].display_generic_number res_obj.generic_number = val[1].generic_number res_obj.protein_conformation = alpha_protconf res_obj.protein_segment = val[1].protein_segment res_obj.save() rot = self.create_structure_rotamer( val[0], res_obj, sc.structure) bulked_rotamers.append(rot) else: self.logger.info( "Skipped {} as no annotation was present, while building for alpha subunit of {}" .format(val[1], sc)) if options["debug"]: pprint.pprint(pdb_num_dict) Rotamer.objects.bulk_create(bulked_rotamers) self.logger.info( "Protein, ProteinConformation and Residue build for alpha subunit of {} is finished" .format(sc)) except Exception as msg: if options["debug"]: print("Error: ", sc, msg) self.logger.info( "Protein, ProteinConformation and Residue build for alpha subunit of {} has failed" .format(sc)) if not options["s"]: ### Build SignprotStructure objects from non-complex signprots g_prot_alphas = Protein.objects.filter( family__slug__startswith="100_001", accession__isnull=False) #.filter(entry_name="gnai1_human") complex_structures = SignprotComplex.objects.all().values_list( "structure__pdb_code__index", flat=True) for a in g_prot_alphas: pdb_list = get_pdb_ids(a.accession) for pdb in pdb_list: if pdb not in complex_structures: try: data = self.fetch_gprot_data(pdb, a) if data: self.build_g_prot_struct(a, pdb, data) except Exception as msg: self.logger.error( "SignprotStructure of {} {} failed\n{}: {}". format(a.entry_name, pdb, type(msg), msg)) if options["debug"]: print(datetime.datetime.now() - startTime)
def create_rotamers(self, structure, pdb_path): wt_lookup = {} #used to match WT seq_number to WT residue record pdbseq = {} #used to keep track of pdbseq residue positions vs index in seq ref_positions = {} #WT postions in alignment mapped_seq = {} # index in contruct, tuple of AA and WT [position,AA] preferred_chain = structure.preferred_chain if len(preferred_chain.split(','))>1: #if A,B preferred_chain = preferred_chain.split(',')[0] AA = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D', 'CYS':'C', 'GLN':'Q', 'GLU':'E', 'GLY':'G', 'HIS':'H', 'ILE':'I', 'LEU':'L', 'LYS':'K', 'MET':'M', 'PHE':'F', 'PRO':'P', 'SER':'S', 'THR':'T', 'TRP':'W', 'TYR':'Y', 'VAL':'V'} s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_path)[0] chain = s[preferred_chain] #select only one chain (avoid n-mer receptors) ppb=PPBuilder() seq = '' i = 1 check_1000 = 0 for pp in ppb.build_peptides(chain): #remove >1000 pos (fusion protein / gprotein) for res in pp: id = res.id if id[1]<600: check_1000 += 1 #need check_1000 to catch structures where they lie in 1000s (4LDE, 4LDL, 4LDO, 4N4W, 4QKX) if id[1]>1000 and check_1000>200: chain.detach_child(id) for pp in ppb.build_peptides(chain): seq += str(pp.get_sequence()) #get seq from fasta (only chain A) for residue in pp: residue_id = residue.get_full_id() chain = residue_id[2] if chain not in pdbseq: pdbseq[chain] = {} pos = residue_id[3][1] pdbseq[chain][pos] = [i,AA[residue.resname]] i += 1 parent_seq = str(structure.protein_conformation.protein.parent.sequence) rs = Residue.objects.filter(protein_conformation__protein=structure.protein_conformation.protein.parent).prefetch_related('display_generic_number','generic_number','protein_segment') for r in rs: #required to match WT position to a record (for duplication of GN values) wt_lookup[r.sequence_number] = r #align WT with structure seq -- make gaps penalties big, so to avoid too much overfitting pw2 = pairwise2.align.localms(parent_seq, seq, 2, -4, -4, -.1) gaps = 0 unmapped_ref = {} for i, r in enumerate(pw2[0][0], 1): #loop over alignment to create lookups (track pos) #print(i,r,pw2[0][1][i-1]) #print alignment for sanity check if r == "-": gaps += 1 if r != "-": ref_positions[i] = [i-gaps,r] elif r == "-": ref_positions[i] = [None,'-'] if pw2[0][1][i-1]=='-': unmapped_ref[i-gaps] = '-' gaps = 0 for i, r in enumerate(pw2[0][1], 1): #make second lookup if r == "-": gaps += 1 if r != "-": mapped_seq[i-gaps] = [r,ref_positions[i]] pdb = structure.pdb_data.pdb protein_conformation=structure.protein_conformation temp = '' check = 0 errors = 0 mismatch_seq = 0 match_seq = 0 not_matched = 0 matched_by_pos = 0 aa_mismatch = 0 pdblines_temp = pdb.splitlines() pdblines = [] for line in pdblines_temp: #Get rid of all odd records if line.startswith('ATOM'): pdblines.append(line) pdblines.append('') #add a line to not "run out" for i,line in enumerate(pdblines): if line.startswith('ATOM'): chain = line[21] if preferred_chain and chain!=preferred_chain: #If perferred is defined and is not the same as the current line, then skip pass else: nextline = pdblines[i+1] residue_number = line[22:26].strip() if (check==0 or nextline[22:26].strip()==check) and nextline.startswith('TER')==False and nextline.startswith('ATOM')==True: #If this is either the begining or the same as previous line add to current rotamer temp += line + "\n" #print('same res',pdb.splitlines()[i+1]) else: #if this is a new residue #print(pdb.splitlines()[i+1][22:26].strip(),check) temp += line + "\n" if int(check.strip())<2000: residue = Residue() residue.sequence_number = int(check.strip()) residue.amino_acid = AA[residue_name.upper()] residue.protein_conformation = protein_conformation #print(residue.sequence_number,residue.amino_acid) #sanity check try: seq_num_pos = pdbseq[chain][residue.sequence_number][0] except: #print('failed residue',pdb_path,residue.sequence_number) temp = "" #start new line for rotamer check = pdblines[i+1][22:26].strip() continue if seq_num_pos in mapped_seq: if mapped_seq[seq_num_pos][1][0]==None: #print('no match found') #sanity check #print(residue.sequence_number,residue.amino_acid) #sanity check residue.display_generic_number = None residue.generic_number = None residue.protein_segment = None not_matched +=1 else: wt_r = wt_lookup[mapped_seq[seq_num_pos][1][0]] if residue.sequence_number!=wt_r.sequence_number and residue.amino_acid!=wt_r.amino_acid and residue.sequence_number in wt_lookup: #if pos numbers not work -- see if the pos number might be in WT and unmapped if wt_lookup[residue.sequence_number].amino_acid==residue.amino_acid: if residue.sequence_number in unmapped_ref: #WT was not mapped, so could be it # print(residue.sequence_number,residue.amino_acid) #sanity check #print('wrongly matched, better match on pos+aa',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid) wt_r = wt_lookup[residue.sequence_number] matched_by_pos +=1 match_seq += 1 else: mismatch_seq += 1 #print('could have been matched, but already aligned to another position',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid) else: #print('WT pos not same AA, mismatch',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid) mismatch_seq += 1 elif residue.sequence_number!=wt_r.sequence_number: #print('WT pos not same pos, mismatch',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid) mismatch_seq += 1 elif residue.amino_acid!=wt_r.amino_acid: #print('aa mismatch',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid) aa_mismatch += 1 else: match_seq += 1 if wt_r.generic_number is not None: residue.display_generic_number = wt_r.display_generic_number residue.generic_number = wt_r.generic_number else: residue.display_generic_number = None residue.generic_number = None #print('no GN') residue.protein_segment = wt_r.protein_segment else: #print('wierd error') #sanity check residue.display_generic_number = None residue.generic_number = None residue.protein_segment = None #print('inserted',residue.sequence_number) #sanity check residue.save() rotamer_data, created = PdbData.objects.get_or_create(pdb=temp) rotamer, created = Rotamer.objects.get_or_create(residue=residue, structure=structure, pdbdata=rotamer_data) temp = "" #start new line for rotamer check = pdblines[i+1][22:26].strip() check = pdblines[i+1][22:26].strip() chain = line[21] residue_name = line[17:20].title() #use title to get GLY to Gly so it matches #print(structure.pdb_code.index,'length',len(seq),len(mapped_seq),'mapped res',str(mismatch_seq+match_seq+aa_mismatch),'pos mismatch',mismatch_seq,'aa mismatch',aa_mismatch,'not mapped',not_matched,' mapping off, matched on pos,aa',matched_by_pos) return None
def SuperimposeChains(final_files, temp_obj, PDB_bychain_objects, temp_chains): """ Superimposes each target chain atoms to the corresponding template chain atoms. Arguments: temp_obj: object of the current template. PDB_bychain_objects: list of PDB objects corresponding to each target chain. temp_chains: dictionary with the correspondencies of template-target chains. """ i = 0 ref_model = temp_obj[0] ppbuild = PPBuilder() template_chains = Selection.unfold_entities(temp_obj, 'C') min_len1 = min( list( map(lambda x: len(ppbuild.build_peptides(x)[0].get_sequence()), template_chains))) min_len2 = min( list( map(lambda x: len(ppbuild.build_peptides(x)[0].get_sequence()), PDB_bychain_objects))) min_len = min([min_len1, min_len2]) atoms_to_be_aligned = range(2, min_len) # Perform the superimposition for each target chain. for sample_structure in PDB_bychain_objects: sample_model = sample_structure[0] ref_atoms = [] sample_atoms = [] # Superimpose the target chain with it's corresponding template chain. for ref_chain in ref_model: for key, val in temp_chains.items(): if val == sample_structure.get_id(): if GeneralFunctions.GetNameWOChain( key) == temp_obj.get_id(): temp_ch = key if temp_obj.get_id() + "_" + ref_chain.get_id() == temp_ch: for ref_res in ref_chain: if ref_res.get_id( )[1] in atoms_to_be_aligned: # Ensure to superimpose the same number of atoms. ref_atoms.append( ref_res['CA']) # Take only C-alfa atoms. for sample_chain in sample_model: for sample_res in sample_chain: if sample_res.get_id( )[1] in atoms_to_be_aligned: # Ensure to superimpose the same number of atoms. sample_atoms.append( sample_res['CA']) # Take only C-alfa atoms. # Superimpose. super_imposer = Superimposer() super_imposer.set_atoms(ref_atoms, sample_atoms) matrix = super_imposer.rotran # Apply rotation and translation. for atom in sample_structure.get_atoms(): atom.transform(matrix[0], matrix[1]) # Create a PDB file to save the new coordinates. io = PDBIO() io.set_structure(sample_structure) io.save(temp_obj.get_id() + "_" + str(i) + "_aligned.pdb", write_end=False) i += 1 # Append each chain to a unique file. j = copy.copy(i) i = 1 file = open(temp_obj.get_id() + "_0_aligned.pdb", 'a') final_files.append(temp_obj.get_id() + "_0_aligned.pdb") while i < j: file2 = open(temp_obj.get_id() + "_" + str(i) + "_aligned.pdb") for line in file2: file.write(line) i += 1
def handle(self, *args, **options): self.options = options if self.options['purge']: Residue.objects.filter( protein_conformation__protein__entry_name__endswith='_a', protein_conformation__protein__family__parent__parent__name= 'Alpha').delete() ProteinConformation.objects.filter( protein__entry_name__endswith='_a', protein__family__parent__parent__name='Alpha').delete() Protein.objects.filter( entry_name__endswith='_a', family__parent__parent__name='Alpha').delete() SignprotStructureExtraProteins.objects.all().delete() SignprotStructure.objects.all().delete() if not options['only_signprot_structures']: # Building protein and protconf objects for g protein structure in complex scs = SignprotComplex.objects.all() for sc in scs: self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} is building' .format(sc)) try: # Alpha subunit try: alpha_protein = Protein.objects.get( entry_name=sc.structure.pdb_code.index.lower() + '_a') except: alpha_protein = Protein() alpha_protein.entry_name = sc.structure.pdb_code.index.lower( ) + '_a' alpha_protein.accession = None alpha_protein.name = sc.structure.pdb_code.index.lower( ) + '_a' alpha_protein.sequence = sc.protein.sequence alpha_protein.family = sc.protein.family alpha_protein.parent = sc.protein alpha_protein.residue_numbering_scheme = sc.protein.residue_numbering_scheme alpha_protein.sequence_type = ProteinSequenceType.objects.get( slug='mod') alpha_protein.source = ProteinSource.objects.get( name='OTHER') alpha_protein.species = sc.protein.species alpha_protein.save() try: alpha_protconf = ProteinConformation.objects.get( protein__entry_name=sc.structure.pdb_code.index. lower() + '_a') except: alpha_protconf = ProteinConformation() alpha_protconf.protein = alpha_protein alpha_protconf.state = ProteinState.objects.get( slug='active') alpha_protconf.save() pdbp = PDBParser(PERMISSIVE=True, QUIET=True) s = pdbp.get_structure('struct', StringIO(sc.structure.pdb_data.pdb)) chain = s[0][sc.alpha] nums = [] for res in chain: try: res['CA'] nums.append(res.get_id()[1]) except: pass resis = Residue.objects.filter( protein_conformation__protein=sc.protein) num_i = 0 temp_seq2 = '' pdb_num_dict = OrderedDict() # Create first alignment based on sequence numbers for n in nums: if sc.structure.pdb_code.index == '6OIJ' and n < 30: nr = n + 6 else: nr = n pdb_num_dict[n] = [ chain[n], resis.get(sequence_number=nr) ] # Find mismatches mismatches = [] for n, res in pdb_num_dict.items(): if AA[res[0].get_resname()] != res[1].amino_acid: mismatches.append(res) pdb_lines = sc.structure.pdb_data.pdb.split('\n') seqadv = [] for l in pdb_lines: if l.startswith('SEQADV'): seqadv.append(l) mutations, shifted_mutations = OrderedDict(), OrderedDict() # Search for annotated engineered mutations in pdb SEQADV for s in seqadv: line_search = re.search( 'SEQADV\s{1}[A-Z\s\d]{4}\s{1}([A-Z]{3})\s{1}([A-Z]{1})\s+(\d+)[\s\S\d]{5}([\s\S\d]{12})([A-Z]{3})\s+(\d+)(\s\S+)', s) if line_search != None: if line_search.group(2) == sc.alpha: if line_search.group( 4).strip() == sc.protein.accession: if line_search.group( 3) == line_search.group(6): mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] else: shifted_mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5), int(line_search.group(6)) ] else: # Exception for 6G79 if line_search.group( 3 ) != line_search.group( 6 ) and 'CONFLICT' in line_search.group(7): mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] # Exception for 5G53 if line_search.group( 4).strip() != sc.protein.accession: mutations[int( line_search.group(3))] = [ line_search.group(1), line_search.group(5) ] remaining_mismatches = [] # Check and clear mismatches that are registered in pdb SEQADV as engineered mutation for m in mismatches: num = m[0].get_id()[1] if num in mutations: if m[0].get_resname() != mutations[num][0] and m[ 1].amino_acid != AA[mutations[num][1]]: remaining_mismatches.append(m) elif num in shifted_mutations: remaining_mismatches.append(m) else: remaining_mismatches.append(m) ### sanity check # print(sc) # print(mutations) # print(shifted_mutations) # print(mismatches) # print('======') # print(remaining_mismatches) # pprint.pprint(pdb_num_dict) # Mismatches remained possibly to seqnumber shift, making pairwise alignment to try and fix alignment if len(remaining_mismatches ) > 0 and sc.structure.pdb_code.index not in [ '6OIJ', '6OY9', '6OYA', '6LPB', '6WHA' ]: ppb = PPBuilder() seq = '' for pp in ppb.build_peptides(chain, aa_only=False): seq += str(pp.get_sequence()) pw2 = pairwise2.align.localms(sc.protein.sequence, seq, 2, -1, -.5, -.1) ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1]) wt_pdb_dict = OrderedDict() pdb_wt_dict = OrderedDict() j, k = 0, 0 for i, ref, temp in zip(range(0, len(ref_seq)), ref_seq, temp_seq): # print(i, ref, temp) # alignment check if ref != '-' and temp != '-': wt_pdb_dict[resis[j]] = pdb_num_dict[nums[k]] pdb_wt_dict[pdb_num_dict[nums[k]] [0]] = resis[j] j += 1 k += 1 elif ref == '-': wt_pdb_dict[i] = pdb_num_dict[nums[k]] pdb_wt_dict[pdb_num_dict[nums[k]][0]] = i k += 1 elif temp == '-': wt_pdb_dict[resis[j]] = i pdb_wt_dict[i] = resis[j] j += 1 for i, r in enumerate(remaining_mismatches): # Adjust for shifted residue when residue is a match if r[0].get_id()[1] - remaining_mismatches[ i - 1][0].get_id()[1] > 1: pdb_num_dict[r[0].get_id()[1] - 1][1] = pdb_wt_dict[chain[ r[0].get_id()[1] - 1]] # Adjust for shifted residue when residue is mutated and it's logged in SEQADV if r[0].get_id()[1] in shifted_mutations: pdb_num_dict[r[0].get_id()[1]][1] = resis.get( sequence_number=shifted_mutations[ r[0].get_id()[1]][2]) # Adjust for shift else: pdb_num_dict[r[0].get_id() [1]][1] = pdb_wt_dict[r[0]] # Custom alignment fix for 6WHA mini-Gq/Gi2/Gs chimera # elif sc.structure.pdb_code.index=='6WHA': # ref_seq = 'MTLESIMACCLSEEAKEARRINDEIERQLRRDKRDARRELKLLLLGTGESGKSTFIKQMRIIHGSGYSDEDKRGFTKLVYQNIFTAMQAMIRAMDTLKIPYKYEHNKAHAQLVREVDVEKVSAFENPYVDAIKSLWNDPGIQECYDRRREYQLSDSTKYYLNDLDRVADPAYLPTQQDVLRVRVPTTGIIEYPFDLQSVIFRMVDVGGQRSERRKWIHCFENVTSIMFLVALSEYDQVLVESDNENRMEESKALFRTIITYPWFQNSSVILFLNKKDLLEEKIMY--SHLVDYFPEYDGP----QRDAQAAREFILKMFVDL---NPDSDKIIYSHFTCATDTENIRFVFAAVKDTILQLNLKEYNLV' # temp_seq = '----------VSAEDKAAAERSKMIDKNLREDGEKARRTLRLLLLGADNSGKSTIVK----------------------------------------------------------------------------------------------------------------------------------GIFETKFQVDKVNFHMFDVG-----RRKWIQCFNDVTAIIFVVDSSDYNR----------LQEALNDFKSIWNNRWLRTISVILFLNKQDLLAEKVLAGKSKIEDYFPEFARYTTPDPRVTRAKY-FIRKEFVDISTASGDGRHICYPHFTC-VDTENARRIFNDCKDIILQMNLREYNLV' # for i, ref, temp in zip(range(0,len(ref_seq)), ref_seq, temp_seq): # print(i, ref, temp) # pprint.pprint(pdb_num_dict) bulked_residues = [] for key, val in pdb_num_dict.items(): # print(key, val) # sanity check if not isinstance(val[1], int): res_obj = Residue() res_obj.sequence_number = val[0].get_id()[1] res_obj.amino_acid = AA[val[0].get_resname()] res_obj.display_generic_number = val[ 1].display_generic_number res_obj.generic_number = val[1].generic_number res_obj.protein_conformation = alpha_protconf res_obj.protein_segment = val[1].protein_segment bulked_residues.append(res_obj) else: self.logger.info( 'Skipped {} as no annotation was present, while building for alpha subunit of {}' .format(val[1], sc)) Residue.objects.bulk_create(bulked_residues) self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} is finished' .format(sc)) except Exception as msg: #print('Protein, ProteinConformation and Residue build for alpha subunit of {} has failed'.format(sc)) #print(msg) #print(traceback.format_exc()) #exit(0) self.logger.info( 'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed' .format(sc)) ### Build SignprotStructure objects from non-complex signprots g_prot_alphas = Protein.objects.filter( family__slug__startswith='100_001', accession__isnull=False) #.filter(entry_name='gnai1_human') complex_structures = SignprotComplex.objects.all().values_list( 'structure__pdb_code__index', flat=True) for a in g_prot_alphas: pdb_list = get_pdb_ids(a.accession) for pdb in pdb_list: if pdb not in complex_structures: try: data = self.fetch_gprot_data(pdb, a) if data: self.build_g_prot_struct(a, pdb, data) except Exception as msg: self.logger.error( 'SignprotStructure of {} {} failed\n{}: {}'.format( a.entry_name, pdb, type(msg), msg))