def find_disulfides(self): ''' Finds disulfide bridges. ''' if self.path_to_pdb=='': raise AssertionError('Run find_files function first to locate the model source file.') disulfide_pairs = [] with open(self.path_to_pdb, 'r') as f: lines = f.readlines() c=1 for line in lines: if c>10: break if line.startswith('SSBOND'): pdb_re = re.search('SSBOND\s+\d+\s+CYS\sR\s+(\d+)\s+CYS\sR\s+(\d+)', line) num1 = pdb_re.group(1) num2 = pdb_re.group(2) res1, res2 = list(Residue.objects.filter(protein_conformation__protein__entry_name=self.receptor, sequence_number__in=[num1, num2])) if res1.display_generic_number!=None: gn1 = ggn(res1.display_generic_number.label) else: gn1 = str(res1.sequence_number) if res2.display_generic_number!=None: gn2 = ggn(res2.display_generic_number.label) else: gn2 = str(res2.sequence_number) disulfide_pairs.append([gn1, gn2]) c+=1 print('FIND disulfides: {}'.format(disulfide_pairs)) return disulfide_pairs
def fetch_residues_from_pdb(self, structure, generic_numbers, modify_bulges=False, just_nums=False): ''' Fetches specific lines from pdb file by generic number (if generic number is not available then by residue number). Returns nested OrderedDict() with generic numbers as keys in the outer dictionary, and atom names as keys in the inner dictionary. @param structure: Structure, Structure object where residues should be fetched from \n @param generic_numbers: list, list of generic numbers to be fetched \n @param modify_bulges: boolean, set it to true when used for bulge switching. E.g. you want a 5x461 residue to be considered a 5x46 residue. ''' output = OrderedDict() atoms_list = [] for gn in generic_numbers: rotamer=None if 'x' in str(gn): rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, residue__display_generic_number__label=dgn(gn,structure.protein_conformation), structure__preferred_chain=structure.preferred_chain)) else: rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, residue__sequence_number=gn, structure__preferred_chain=structure.preferred_chain)) if just_nums==False: try: gn = ggn(Residue.objects.get(protein_conformation=structure.protein_conformation, sequence_number=gn).display_generic_number.label) except: pass if len(rotamer)>1: for i in rotamer: if i.pdbdata.pdb.startswith('COMPND')==False: if i.pdbdata.pdb[21] in structure.preferred_chain: rotamer = i break else: rotamer = rotamer[0] io = StringIO(rotamer.pdbdata.pdb) rota_struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0] for chain in rota_struct: for residue in chain: for atom in residue: atoms_list.append(atom) if modify_bulges==True and len(gn)==5: output[gn.replace('x','.')[:-1]] = atoms_list else: try: output[gn.replace('x','.')] = atoms_list except: output[str(gn)] = atoms_list atoms_list = [] return output
def pdb_array_creator(self, structure=None, filename=None): ''' Creates an OrderedDict() from the pdb of a Structure object where residue numbers/generic numbers are keys for the residues, and atom names are keys for the Bio.PDB.Residue objects. @param structure: Structure, Structure object of protein. When using structure, leave filename=None. \n @param filename: str, filename of pdb to be parsed. When using filename, leave structure=None). ''' # seq_nums_overwrite_cutoff_dict = {'4PHU':2000, '4LDL':1000, '4LDO':1000, '4QKX':1000, '5JQH':1000, '5TZY':2000, '5KW2':2000} if structure != None and filename == None: io = StringIO(structure.pdb_data.pdb) else: io = filename gn_array = [] residue_array = [] # pdb_struct = PDB.PDBParser(QUIET=True).get_structure(structure.pdb_code.index, io)[0] residues = Residue.objects.filter( protein_conformation=structure.protein_conformation) gn_list = [] for i in residues: try: gn_list.append( ggn(i.display_generic_number.label).replace('x', '.')) except: pass ssno = StructureSeqNumOverwrite(structure) ssno.seq_num_overwrite('pdb') if len(ssno.pdb_wt_table) > 0: residues = residues.filter(protein_segment__slug__in=[ 'TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TM7', 'H8' ]).order_by('sequence_number') output = OrderedDict() for r in residues: if r.protein_segment.slug == None: continue if r.protein_segment.slug not in output: output[r.protein_segment.slug] = OrderedDict() rotamer = Rotamer.objects.filter(residue=r) rotamer = self.right_rotamer_select(rotamer) rota_io = StringIO(rotamer.pdbdata.pdb) p = PDB.PDBParser() parsed_rota = p.get_structure('rota', rota_io) for chain in parsed_rota[0]: for res in chain: atom_list = [] for atom in res: # Skip hydrogens if atom.get_id().startswith('H'): continue if atom.get_id() == 'N': bw, gn = r.display_generic_number.label.split( 'x') atom.set_bfactor(bw) elif atom.get_id() == 'CA': bw, gn = r.display_generic_number.label.split( 'x') gn = "{}.{}".format(bw.split('.')[0], gn) if len(gn.split('.')[1]) == 3: gn = '-' + gn[:-1] atom.set_bfactor(gn) atom_list.append(atom) output[r.protein_segment.slug][ggn( r.display_generic_number.label).replace( 'x', '.')] = atom_list pprint.pprint(output) return output else: assign_gn = as_gn.GenericNumbering( pdb_file=io, pdb_code=structure.pdb_code.index, sequence_parser=True) pdb_struct = assign_gn.assign_generic_numbers_with_sequence_parser( ) pref_chain = structure.preferred_chain parent_prot_conf = ProteinConformation.objects.get( protein=structure.protein_conformation.protein.parent) parent_residues = Residue.objects.filter( protein_conformation=parent_prot_conf) last_res = list(parent_residues)[-1].sequence_number if len(pref_chain) > 1: pref_chain = pref_chain[0] for residue in pdb_struct[pref_chain]: if 'CA' in residue and -9.1 < residue['CA'].get_bfactor( ) < 9.1: use_resid = False gn = str(residue['CA'].get_bfactor()) if len(gn.split('.')[1]) == 1: gn = gn + '0' if gn[0] == '-': gn = gn[1:] + '1' # Exceptions if structure.pdb_code.index == '3PBL' and residue.get_id( )[1] == 331: use_resid = True elif structure.pdb_code.index == '6QZH' and residue.get_id( )[1] == 1434: use_resid = True elif structure.pdb_code.index == '7M3E': use_resid = True ################################################# elif gn in gn_list: gn_array.append(gn) residue_array.append(residue.get_list()) else: use_resid = True if use_resid: gn_array.append(str(residue.get_id()[1])) residue_array.append(residue.get_list()) output = OrderedDict() for num, label in self.segment_coding.items(): output[label] = OrderedDict() if len(gn_array) != len(residue_array): raise AssertionError() for gn, res in zip(gn_array, residue_array): if '.' in gn: seg_num = int(gn.split('.')[0]) seg_label = self.segment_coding[seg_num] if seg_num == 8 and len(output['TM7']) == 0: continue else: output[seg_label][gn] = res else: try: found_res, found_gn = None, None try: found_res = Residue.objects.get( protein_conformation=structure. protein_conformation, sequence_number=gn) except: # Exception for res 317 in 5VEX, 5VEW if structure.pdb_code.index in [ '5VEX', '5VEW' ] and gn == '317' and res[0].get_parent( ).get_resname() == 'CYS': found_res = Residue.objects.get( protein_conformation=parent_prot_conf, sequence_number=gn) ##################################### found_gn = str( ggn(found_res.display_generic_number.label) ).replace('x', '.') # Exception for res 318 in 5VEX, 5VEW if structure.pdb_code.index in [ '5VEX', '5VEW' ] and gn == '318' and res[0].get_parent().get_resname( ) == 'ILE' and found_gn == '5.47': found_gn = '5.48' ##################################### if -9.1 < float(found_gn) < 9.1: if len(res) == 1: continue if int(gn) > last_res: continue seg_label = self.segment_coding[int( found_gn.split('.')[0])] output[seg_label][found_gn] = res except: if res[0].get_parent().get_resname() == 'YCM' or res[ 0].get_parent().get_resname() == 'CSD': try: found_res = Residue.objects.get( protein_conformation=parent_prot_conf, sequence_number=gn) except: continue if found_res.protein_segment.slug[0] not in [ 'T', 'H' ]: continue try: found_gn = str( ggn(found_res.display_generic_number.label) ).replace('x', '.') except: found_gn = str(gn) output[ found_res.protein_segment.slug][found_gn] = res return output
def pdb_array_creator(self, structure=None, filename=None): ''' Creates an OrderedDict() from the pdb of a Structure object where residue numbers/generic numbers are keys for the residues, and atom names are keys for the Bio.PDB.Residue objects. @param structure: Structure, Structure object of protein. When using structure, leave filename=None. \n @param filename: str, filename of pdb to be parsed. When using filename, leave structure=None). ''' seq_nums_overwrite_cutoff_dict = {'4PHU':2000, '4LDL':1000, '4LDO':1000, '4QKX':1000, '5JQH':1000, '5TZY':2000, '5KW2':2000} if structure!=None and filename==None: io = StringIO(structure.pdb_data.pdb) else: io = filename gn_array = [] residue_array = [] # pdb_struct = PDB.PDBParser(QUIET=True).get_structure(structure.pdb_code.index, io)[0] residues = Residue.objects.filter(protein_conformation=structure.protein_conformation) gn_list = [] for i in residues: try: gn_list.append(ggn(i.display_generic_number.label).replace('x','.')) except: pass assign_gn = as_gn.GenericNumbering(pdb_file=io, pdb_code=structure.pdb_code.index, sequence_parser=True) pdb_struct = assign_gn.assign_generic_numbers_with_sequence_parser() pref_chain = structure.preferred_chain parent_prot_conf = ProteinConformation.objects.get(protein=structure.protein_conformation.protein.parent) parent_residues = Residue.objects.filter(protein_conformation=parent_prot_conf) last_res = list(parent_residues)[-1].sequence_number if len(pref_chain)>1: pref_chain = pref_chain[0] for residue in pdb_struct[pref_chain]: try: if -9.1 < residue['CA'].get_bfactor() < 9.1: gn = str(residue['CA'].get_bfactor()) if len(gn.split('.')[1])==1: gn = gn+'0' if gn[0]=='-': gn = gn[1:]+'1' # Exception for 3PBL 331, gn get's assigned wrong if structure.pdb_code.index=='3PBL' and residue.get_id()[1]==331: raise Exception() ################################################# if gn in gn_list: if int(residue.get_id()[1])>1000: if structure.pdb_code.index in seq_nums_overwrite_cutoff_dict and int(residue.get_id()[1])>=seq_nums_overwrite_cutoff_dict[structure.pdb_code.index]: gn_array.append(gn) residue_array.append(residue.get_list()) else: raise Exception() else: gn_array.append(gn) residue_array.append(residue.get_list()) else: raise Exception() else: raise Exception() except: if structure!=None and structure.pdb_code.index in seq_nums_overwrite_cutoff_dict: if int(residue.get_id()[1])>seq_nums_overwrite_cutoff_dict[structure.pdb_code.index]: gn_array.append(str(int(str(residue.get_id()[1])[1:]))) else: gn_array.append(str(residue.get_id()[1])) else: gn_array.append(str(residue.get_id()[1])) residue_array.append(residue.get_list()) output = OrderedDict() for num, label in self.segment_coding.items(): output[label] = OrderedDict() if len(gn_array)!=len(residue_array): raise AssertionError() for gn, res in zip(gn_array,residue_array): if '.' in gn: seg_num = int(gn.split('.')[0]) seg_label = self.segment_coding[seg_num] if seg_num==8 and len(output['TM7'])==0: continue else: output[seg_label][gn] = res else: try: found_res, found_gn = None, None try: found_res = Residue.objects.get(protein_conformation=structure.protein_conformation, sequence_number=gn) except: # Exception for res 317 in 5VEX, 5VEW if structure.pdb_code.index in ['5VEX','5VEW'] and gn=='317' and res[0].get_parent().get_resname()=='CYS': found_res = Residue.objects.get(protein_conformation=parent_prot_conf, sequence_number=gn) ##################################### found_gn = str(ggn(found_res.display_generic_number.label)).replace('x','.') # Exception for res 318 in 5VEX, 5VEW if structure.pdb_code.index in ['5VEX','5VEW'] and gn=='318' and res[0].get_parent().get_resname()=='ILE' and found_gn=='5.47': found_gn = '5.48' ##################################### if -9.1 < float(found_gn) < 9.1: if len(res)==1: continue if int(gn)>last_res: continue seg_label = self.segment_coding[int(found_gn.split('.')[0])] output[seg_label][found_gn] = res except: if res[0].get_parent().get_resname()=='YCM' or res[0].get_parent().get_resname()=='CSD': found_res = Residue.objects.get(protein_conformation=parent_prot_conf, sequence_number=gn) if found_res.protein_segment.slug[0] not in ['T','H']: continue try: found_gn = str(ggn(found_res.display_generic_number.label)).replace('x','.') except: found_gn = str(gn) output[found_res.protein_segment.slug][found_gn] = res return output