def pdb_array_creator(self, structure=None, filename=None): ''' Creates an OrderedDict() from the pdb of a Structure object where residue numbers/generic numbers are keys for the residues, and atom names are keys for the Bio.PDB.Residue objects. @param structure: Structure, Structure object of protein. When using structure, leave filename=None. \n @param filename: str, filename of pdb to be parsed. When using filename, leave structure=None). ''' # seq_nums_overwrite_cutoff_dict = {'4PHU':2000, '4LDL':1000, '4LDO':1000, '4QKX':1000, '5JQH':1000, '5TZY':2000, '5KW2':2000} if structure != None and filename == None: io = StringIO(structure.pdb_data.pdb) else: io = filename gn_array = [] residue_array = [] # pdb_struct = PDB.PDBParser(QUIET=True).get_structure(structure.pdb_code.index, io)[0] residues = Residue.objects.filter( protein_conformation=structure.protein_conformation) gn_list = [] for i in residues: try: gn_list.append( ggn(i.display_generic_number.label).replace('x', '.')) except: pass ssno = StructureSeqNumOverwrite(structure) ssno.seq_num_overwrite('pdb') if len(ssno.pdb_wt_table) > 0: residues = residues.filter(protein_segment__slug__in=[ 'TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TM7', 'H8' ]).order_by('sequence_number') output = OrderedDict() for r in residues: print(r, r.display_generic_number.label, r.protein_segment.slug) if r.protein_segment.slug == None: continue if r.protein_segment.slug not in output: output[r.protein_segment.slug] = OrderedDict() rotamer = Rotamer.objects.filter(residue=r) rotamer = self.right_rotamer_select(rotamer) rota_io = StringIO(rotamer.pdbdata.pdb) p = PDB.PDBParser() parsed_rota = p.get_structure('rota', rota_io) for chain in parsed_rota[0]: for res in chain: atom_list = [] for atom in res: # Skip hydrogens if atom.get_id().startswith('H'): continue if atom.get_id() == 'N': bw, gn = r.display_generic_number.label.split( 'x') atom.set_bfactor(bw) elif atom.get_id() == 'CA': bw, gn = r.display_generic_number.label.split( 'x') gn = "{}.{}".format(bw.split('.')[0], gn) if len(gn.split('.')[1]) == 3: gn = '-' + gn[:-1] atom.set_bfactor(gn) atom_list.append(atom) output[r.protein_segment.slug][ggn( r.display_generic_number.label).replace( 'x', '.')] = atom_list pprint.pprint(output) return output else: assign_gn = as_gn.GenericNumbering( pdb_file=io, pdb_code=structure.pdb_code.index, sequence_parser=True) pdb_struct = assign_gn.assign_generic_numbers_with_sequence_parser( ) pref_chain = structure.preferred_chain parent_prot_conf = ProteinConformation.objects.get( protein=structure.protein_conformation.protein.parent) parent_residues = Residue.objects.filter( protein_conformation=parent_prot_conf) last_res = list(parent_residues)[-1].sequence_number if len(pref_chain) > 1: pref_chain = pref_chain[0] for residue in pdb_struct[pref_chain]: if 'CA' in residue and -9.1 < residue['CA'].get_bfactor( ) < 9.1: use_resid = False gn = str(residue['CA'].get_bfactor()) if len(gn.split('.')[1]) == 1: gn = gn + '0' if gn[0] == '-': gn = gn[1:] + '1' # Exceptions if structure.pdb_code.index == '3PBL' and residue.get_id( )[1] == 331: use_resid = True elif structure.pdb_code.index == '6QZH' and residue.get_id( )[1] == 1434: use_resid = True ################################################# elif gn in gn_list: gn_array.append(gn) residue_array.append(residue.get_list()) else: use_resid = True if use_resid: gn_array.append(str(residue.get_id()[1])) residue_array.append(residue.get_list()) output = OrderedDict() for num, label in self.segment_coding.items(): output[label] = OrderedDict() if len(gn_array) != len(residue_array): raise AssertionError() for gn, res in zip(gn_array, residue_array): if '.' in gn: seg_num = int(gn.split('.')[0]) seg_label = self.segment_coding[seg_num] if seg_num == 8 and len(output['TM7']) == 0: continue else: output[seg_label][gn] = res else: try: found_res, found_gn = None, None try: found_res = Residue.objects.get( protein_conformation=structure. protein_conformation, sequence_number=gn) except: # Exception for res 317 in 5VEX, 5VEW if structure.pdb_code.index in [ '5VEX', '5VEW' ] and gn == '317' and res[0].get_parent( ).get_resname() == 'CYS': found_res = Residue.objects.get( protein_conformation=parent_prot_conf, sequence_number=gn) ##################################### found_gn = str( ggn(found_res.display_generic_number.label) ).replace('x', '.') # Exception for res 318 in 5VEX, 5VEW if structure.pdb_code.index in [ '5VEX', '5VEW' ] and gn == '318' and res[0].get_parent().get_resname( ) == 'ILE' and found_gn == '5.47': found_gn = '5.48' ##################################### if -9.1 < float(found_gn) < 9.1: if len(res) == 1: continue if int(gn) > last_res: continue seg_label = self.segment_coding[int( found_gn.split('.')[0])] output[seg_label][found_gn] = res except: if res[0].get_parent().get_resname() == 'YCM' or res[ 0].get_parent().get_resname() == 'CSD': try: found_res = Residue.objects.get( protein_conformation=parent_prot_conf, sequence_number=gn) except: continue if found_res.protein_segment.slug[0] not in [ 'T', 'H' ]: continue try: found_gn = str( ggn(found_res.display_generic_number.label) ).replace('x', '.') except: found_gn = str(gn) output[ found_res.protein_segment.slug][found_gn] = res return output
def get_simrna_ready(self, renumber_residues=True): """Get simrna_ready .. - take only first model, - renumber residues if renumber_residues=True .. warning:: requires: Biopython""" try: from Bio import PDB from Bio.PDB import PDBIO except: sys.exit('Error: Install biopython to use this function (pip biopython)') import warnings warnings.filterwarnings('ignore', '.*Invalid or missing.*',) warnings.filterwarnings('ignore', '.*with given element *',) import copy G_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split() A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split() U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split() C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split() ftmp = '/tmp/out.pdb' self.write(ftmp,v=False) parser = PDB.PDBParser() struct = parser.get_structure('', ftmp) model = struct[0] s2 = PDB.Structure.Structure(struct.id) m2 = PDB.Model.Model(model.id) chains2 = [] missing = [] for chain in model.get_list(): res = [] for r in chain: res.append(r) res = copy.copy(res) c2 = PDB.Chain.Chain(chain.id) c = 1 # new chain, goes from 1 if renumber True for r in res: # hack for amber/qrna r.resname = r.resname.strip() if r.resname == 'RC3': r.resname = 'C' if r.resname == 'RU3': r.resname = 'U' if r.resname == 'RG3': r.resname = 'G' if r.resname == 'RA3': r.resname = 'A' if r.resname == 'C3': r.resname = 'C' if r.resname == 'U3': r.resname = 'U' if r.resname == 'G3': r.resname = 'G' if r.resname == 'A3': r.resname = 'A' if r.resname == 'RC5': r.resname = 'C' if r.resname == 'RU5': r.resname = 'U' if r.resname == 'RG5': r.resname = 'G' if r.resname == 'RA5': r.resname = 'A' if r.resname == 'C5': r.resname = 'C' if r.resname == 'U5': r.resname = 'U' if r.resname == 'G5': r.resname = 'G' if r.resname == 'A5': r.resname = 'A' if r.resname.strip() == 'RC': r.resname = 'C' if r.resname.strip() == 'RU': r.resname = 'U' if r.resname.strip() == 'RG': r.resname = 'G' if r.resname.strip() == 'RA': r.resname = 'A' r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid) if renumber_residues: r2.id = (r2.id[0], c, r2.id[2]) ## renumber residues if c == 1: p_missing = True #if p_missing: # try: # x = r["O5'"] # x.id = ' P' # x.name = ' P' # x.fullname = ' P' # print "REMARK 000 FIX O5' -> P fix in chain ", chain.id # except: # pass for a in r: if a.id == 'P': p_missing = False if p_missing: currfn = __file__ if currfn == '': path = '.' else: path = os.path.dirname(currfn) if os.path.islink(currfn):#path + os.sep + os.path.basename(__file__)): path = os.path.dirname(os.readlink(path + os.sep + os.path.basename(currfn))) po3_struc = PDB.PDBParser().get_structure('', path + '/data/PO3_inner.pdb') po3 = [po3_atom for po3_atom in po3_struc[0].get_residues()][0] r_atoms = [r["O4'"], r["C4'"], r["C3'"]] po3_atoms = [po3["O4'"], po3["C4'"], po3["C3'"]] sup = PDB.Superimposer() sup.set_atoms(r_atoms, po3_atoms) rms = round(sup.rms, 3) sup.apply( po3_struc.get_atoms() ) # to all atoms of po3 r.add( po3['P']) r.add( po3['OP1']) r.add( po3['OP2']) try: r.add( po3["O5'"]) except: del r["O5'"] r.add( po3["O5'"]) p_missing = False # off this function # save it #io = PDB.PDBIO() #io.set_structure( po3_struc ) #io.save("po3.pdb") if str(r.get_resname()).strip() == "G": for an in G_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "A": for an in A_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "C": for an in C_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "U": for an in U_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r,' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) c += 1 chains2.append(c2) io = PDBIO() s2.add(m2) for chain2 in chains2: m2.add(chain2) #print c2 #print m2 io.set_structure(s2) #fout = fn.replace('.pdb', '_fx.pdb') fout = '/tmp/outout.pdb' # hack io.save(fout) if missing: print('REMARK 000 Missing atoms:') for i in missing: print('REMARK 000 +', i[0], i[1], i[2], 'residue #', i[3]) #raise Exception('Missing atoms in %s' % self.fn) s = StrucFile(fout) self.lines = s.lines
def calculate_adjecency(fname, pdb_chain, pdb_list, seq, get_angle=True, include_nones=True, **kw_args): ''' params: fname (str) path to .pdb structure file chain (str) chain name pdb_list (list) list of residues indices seq (str) chain residues sequence - used to match sizes returns: distance_matrix (np.ndarray) ''' #check informations assert pdb_chain is not None assert os.path.isfile(fname), f'no such file {fname}' with open(fname, 'rt') as f: structure = bp.PDBParser().get_structure(pdb_chain, f) assert len(structure) == 1 model = structure[0] # Generate dict of residue objects resid2res = dict([(''.join([str(j) for j in res.full_id[-1][1:]]).strip(), res) for res in model.get_residues()]) coords_a = [] coords_b = [] for pdb_idx in pdb_list: # Residue present in sequence but *not* in structure if pdb_idx is None and include_nones == False: continue elif pdb_idx is None: coords_a.append(np.array([np.NaN, np.NaN, np.NaN])) coords_b.append(np.array([np.NaN, np.NaN, np.NaN])) else: res = resid2res[pdb_idx] coords_a.append(res.child_dict['CA'].coord) if res.resname == 'GLY': cb = generate_Cb(res) else: try: cb = res.child_dict['CB'].coord except KeyError: print( f'CB atom missing for res {res.resname} in {pdb_chain}; dispatching monkeys to address this fatal issue' ) cb = generate_Cb(res) coords_b.append(cb) if include_nones == True: assert len(coords_a) == len(coords_b) == len(seq) xyz_alpha = np.array(coords_a, dtype=np.float32) xyz_beta = np.array(coords_b, dtype=np.float32) #alpha_dist = prairwaise_vec(xyz_alpha) #beta_dist = prairwaise_vec(xyz_beta) # To sieje bledami z powodu nan'ow (swoja droga eleganckie rozwiazanie!) #parallel_side_chains = (alpha_dist < beta_dist)*1 distance = distance_matrix(xyz_alpha, xyz_alpha) if None in pdb_list and include_nones == True: shape = distance.shape[0] off_diag_left = np.arange(1, shape, 1, dtype=int) off_diag_right = np.arange(0, shape - 1, 1, dtype=int) diag = np.arange(0, shape, 1, dtype=int) distance[off_diag_left, off_diag_right] = 5 distance[off_diag_right, off_diag_left] = 5 if get_angle: angle_dist = side_chains_angles(xyz_alpha, xyz_beta) return distance, angle_dist else: return distance
def get_seq(path): dico = get_info(path) parser = pdb.PDBParser() structure = parser.get_structure(path, path) seq = dict() maxenf = 1e-10 AA = [ 'CYS', 'ASP', 'SER', 'GLN', 'LYS', 'ILE', 'PRO', 'THR', 'PHE', 'ASN', 'GLY', 'HIS', 'LEU', 'ARG', 'TRP', 'ALA', 'VAL', 'GLU', 'TYR', 'MET' ] for model in structure: for chain in model: for residue in chain: if (residue.get_resname() in AA and residue.get_resname()[0] != " "): aminoacid = dict() aminoacid["name"] = convert_name_AA(residue.get_resname()) bary_res = [0, 0, 0] num_atom = 0 for atom in residue: bary_res += atom.get_coord() num_atom += 1 bary_res /= num_atom #aminoacid["bary_res"] = bary_res aminoacid["enfouissement"] = sum( (bary_res - dico["baryres"])**2)**0.5 if (aminoacid["enfouissement"] > maxenf): maxenf = aminoacid["enfouissement"] aminoacid["struct"] = "V" seq[get_num(residue)] = aminoacid #print(seq) lines = open(path, "r").readlines() for line in lines: if (line[:6] == "HELIX "): start = int(line[21:25]) end = int(line[33:37]) #print(path[-8:], start, end) for i in range(start, end + 1): if (i in seq.keys()): seq[i]["struct"] = "H" if (line[:6] == "SHEET "): start = int(line[23:26]) end = int(line[34:37]) for i in range(start, end + 1): if (i in seq.keys()): seq[i]["struct"] = "F" idref = 0 for key in seq.keys(): seq[key]["enfouissement"] = 1 - seq[key]["enfouissement"] / maxenf seq[key]["id"] = idref idref += 1 ID = [seq[k]["id"] for k in seq.keys()] if (not sum([ID[k] == k for k in range(0, len(ID))])): print("WARNING - problème id des séquences", path[4:-4], ID) return seq
def draw_bounding_boxes(self, bg, s): ''' Draw bounding boxes for all of the residues encompassed by a stem. But only if there is a pdb file handy. @param bg: The BulgeGraph @param s: The name of the stem ''' if self.pdb_file is None: return with warnings.catch_warnings(): warnings.simplefilter("ignore") struct = bp.PDBParser().get_structure('temp', self.pdb_file) chain = list(struct.get_chains())[0] for i in range(bg.stem_length(s)): (origin, bases, bb) = ftug.bounding_boxes(bg, chain, s, i) for k in range(2): (n, x) = bb[k] corners = [ [n[0], n[1], n[2]], [n[0], n[1], x[2]], [n[0], x[1], n[2]], [n[0], x[1], x[2]], [x[0], n[1], n[2]], [x[0], n[1], x[2]], [x[0], x[1], n[2]], [x[0], x[1], x[2]], [n[0], n[1], n[2]], [x[0], n[1], n[2]], [n[0], x[1], n[2]], [x[0], x[1], n[2]], [n[0], x[1], x[2]], [x[0], x[1], x[2]], [n[0], n[1], x[2]], [x[0], n[1], x[2]], [n[0], n[1], n[2]], [n[0], x[1], n[2]], [x[0], n[1], n[2]], [x[0], x[1], n[2]], [n[0], n[1], x[2]], [n[0], x[1], x[2]], [x[0], n[1], x[2]], [x[0], x[1], x[2]]] new_corners = [] for corner in corners: new_corners += [origin + cuv.change_basis(np.array(corner), cuv.standard_basis, bases[k])] corners = np.array(new_corners) if k == 0: self.boxes += [(corners, 'yellow')] self.add_sphere(corners[0], 'yellow', 0.4, '', [238 / 255., 221 / 255., 130 / 255.]) self.add_sphere(corners[7], 'yellow', 0.4, '', [184 / 255., 134 / 255., 11 / 255.]) else: self.add_sphere(corners[0], 'purple', 0.4, '', [238 / 255., 130 / 255., 238 / 255.]) self.add_sphere(corners[7], 'purple', 0.4, '', [208 / 255., 32 / 255., 144 / 255.]) self.boxes += [(corners, 'purple')]
def compute_dihedrals(pdbfilename): ignoremodified=('PTR','TPO','SEP','MSE','BWB','CAS','CME','CSO','CSS','CSX','MK8','MLY','NEP','NMM','PHD','CAF','CSD','CYO','OCS','OCY','SCS',\ 'ALY','KCX',',LGY','CXM','MHO','T8L','ACE','AME','CY0','UNK','T8L','MHO','COM') if '.gz' in pdbfilename.lower(): handle = gzip.open(pdbfilename, 'rt') pdbfilename = pdbfilename[0:-3] else: handle = open(pdbfilename, 'r') if '.pdb' in pdbfilename.lower(): parser = PDB.PDBParser(QUIET=True) if '.cif' in pdbfilename.lower(): parser = PDB.MMCIFParser(QUIET=True) structure = parser.get_structure("PDB", handle) for model in structure: for chain in model: first = 1 for residue in chain: if residue.id[0] != ' ' or residue.id[0][2:] in ignoremodified: continue if first == 1: #The 'first' blocks are required to assign first and second residue to variables prev_residue = residue first = 2 continue if first == 2: #This block computes psi dihedral for first residue curr_residue = residue psi = compute_psi(structure, model, chain, prev_residue, curr_residue) chi1 = compute_chi1(structure, model, chain, prev_residue) chi2 = compute_chi2(structure, model, chain, prev_residue) chi3 = compute_chi3(structure, model, chain, prev_residue) chi4 = compute_chi4(structure, model, chain, prev_residue) first = 3 print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(prev_residue.id[1]).rjust(8)+prev_residue.resname.rjust(8)+\ str(999.00).rjust(8)+str(psi).rjust(8)+str(999.00).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8)) continue if first == 3: #This block computes phi and psi dihedrals from second residue onward. At anytime in the block we have three residue variables assigned. next_residue = residue phi = compute_phi(structure, model, chain, prev_residue, curr_residue) psi = compute_psi(structure, model, chain, curr_residue, next_residue) omega = compute_omega(structure, model, chain, prev_residue, curr_residue) chi1 = compute_chi1(structure, model, chain, curr_residue) chi2 = compute_chi2(structure, model, chain, curr_residue) chi3 = compute_chi3(structure, model, chain, curr_residue) chi4 = compute_chi4(structure, model, chain, curr_residue) print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(curr_residue.id[1]).rjust(8)+curr_residue.resname.rjust(8)+\ str(phi).rjust(8)+str(psi).rjust(8)+str(omega).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8)) prev_residue = curr_residue curr_residue = next_residue #update residue variables if first == 3: #This block computes phi dihedral for the last residue phi = compute_phi(structure, model, chain, prev_residue, curr_residue) omega = compute_omega(structure, model, chain, prev_residue, curr_residue) chi1 = compute_chi1(structure, model, chain, curr_residue) chi2 = compute_chi2(structure, model, chain, curr_residue) chi3 = compute_chi3(structure, model, chain, curr_residue) chi4 = compute_chi4(structure, model, chain, curr_residue) print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(curr_residue.id[1]).rjust(8)+curr_residue.resname.rjust(8)\ +str(phi).rjust(8)+str(999.00).rjust(8)+str(omega).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8)) return
def structure_from_pdb_string(pdb_string, name=''): '''Read the structure stored in a PDB string.''' parser = PDB.PDBParser() pdb_sf = io.StringIO(pdb_string) return parser.get_structure(name, pdb_sf)
''' Extract the protein sequence from a PDB chain. ----------------------------------------------------------- (c) 2013 Allegra Via and Kristian Rother Licensed under the conditions of the Python License This code appears in section 21.4.2 of the book "Managing Biological Data with Python". ----------------------------------------------------------- ''' from Bio import PDB from Bio.PDB.Polypeptide import PPBuilder parser = PDB.PDBParser() structure = parser.get_structure("2DN1", "dn/pdb2dn1.ent") ppb = PPBuilder() peptides = ppb.build_peptides(structure) for pep in peptides: print pep.get_sequence()
def run_RMSD(self, file1, file2): ''' Calculates 4 RMSD values between two GPCR pdb files. It compares the two files using sequence numbers. 1. overall all atoms RMSD 2. overall backbone atoms RMSD 3. 7TM all atoms RMSD 4. 7TM backbone atoms RMSD ''' parser = PDB.PDBParser(QUIET=True) pdb1 = parser.get_structure('struct1', file1)[0] pdb2 = parser.get_structure('struct2', file2)[0] pdb_array1, pdb_array2, pdb_array3, pdb_array4 = OrderedDict( ), OrderedDict(), OrderedDict(), OrderedDict() assign_gn1 = as_gn.GenericNumbering(structure=pdb1) pdb1 = assign_gn1.assign_generic_numbers() assign_gn2 = as_gn.GenericNumbering(structure=pdb2) pdb2 = assign_gn2.assign_generic_numbers() for i in pdb1: for j in pdb2: if i.get_id() == j.get_id(): chain1 = i.get_id() chain2 = i.get_id() break if 'chain1' not in locals(): for i in pdb1.get_chains(): chain1 = i.get_id() break if 'chain2' not in locals(): for i in pdb2.get_chains(): chain2 = i.get_id() break for residue1 in pdb1[chain1]: if residue1.get_full_id()[3][0] != ' ': continue pdb_array1[int(residue1.get_id()[1])] = residue1 try: if -8.1 < residue1['CA'].get_bfactor() < 8.1: pdb_array3[int(residue1.get_id()[1])] = residue1 except: pass for residue2 in pdb2[chain2]: if residue2.get_full_id()[3][0] != ' ': continue pdb_array2[int(residue2.get_id()[1])] = residue2 try: if -8.1 < residue2['CA'].get_bfactor() < 8.1: pdb_array4[int(residue2.get_id()[1])] = residue2 except: pass overall_all1, overall_all2, overall_backbone1, overall_backbone2, o_a, o_b = self.create_lists( pdb_array1, pdb_array2) TM_all1, TM_all2, TM_backbone1, TM_backbone2, t_a, t_b = self.create_lists( pdb_array3, pdb_array4) rmsd1 = self.calc_RMSD(overall_all1, overall_all2, o_a) rmsd2 = self.calc_RMSD(overall_backbone1, overall_backbone2, o_b) rmsd3 = self.calc_RMSD(TM_all1, TM_all2, t_a) rmsd4 = self.calc_RMSD(TM_backbone1, TM_backbone2, t_b) return [rmsd1, rmsd2, rmsd3, rmsd4]
def run_RMSD_list(self, files, seq_nums=None): ''' Calculates 4 RMSD values between a list of GPCR pdb files. It compares the files using sequence and generic numbers. First file in the list has to be the reference file. 1. overall all atoms RMSD 2. overall backbone atoms RMSD 3. 7TM all atoms RMSD 4. 7TM backbone atoms RMSD ''' c = 0 for f in files: c += 1 if c == 1: self.number_of_residues_superposed['reference'] = OrderedDict() self.number_of_atoms_superposed['reference'] = OrderedDict() self.rmsds['reference'] = OrderedDict() else: self.number_of_residues_superposed['file{}'.format( str(c))] = OrderedDict() self.number_of_atoms_superposed['file{}'.format( str(c))] = OrderedDict() self.rmsds['file{}'.format(str(c))] = OrderedDict() parser = PDB.PDBParser(QUIET=True) count = 0 pdbs = [] for f in files: count += 1 pdb = parser.get_structure('struct{}'.format(count), f)[0] assign_gn = as_gn.GenericNumbering(structure=pdb) pdb = assign_gn.assign_generic_numbers() pdbs.append(pdb) chains = [] for p in pdbs: this = [] for c in p.get_chains(): this.append(c.get_id()) chains.append(this) usable_chains = [] for m in chains[1:]: for c in m: if c in chains[0]: usable_chains.append(c) arrays = [] for p in pdbs: try: if pdbs.index(p) == 0 and len(usable_chains) == 0: chain = [c.get_id() for c in pdbs[0].get_chains()][0] else: chain = p[usable_chains[0]].get_id() except: try: chain = p[' '].get_id() except: chain = p['A'].get_id() pdb_array1, pdb_array2 = OrderedDict(), OrderedDict() for residue in p[chain]: if residue.get_full_id()[3][0] != ' ': continue if seq_nums != None and int(residue.get_id()[1]) in seq_nums: pdb_array1[int(residue.get_id()[1])] = residue elif seq_nums == None: pdb_array1[int(residue.get_id()[1])] = residue try: if -8.1 < residue['CA'].get_bfactor() < 8.1: pdb_array2[int(residue.get_id()[1])] = residue except: pass arrays.append([pdb_array1, pdb_array2]) all_deletes, TM_deletes = [], [] all_keep, TM_keep = [], [] for i in range(0, 2): for res in arrays[0][i]: for m in arrays[1:]: if res not in m[i]: if i == 0: all_deletes.append(res) else: TM_deletes.append(res) else: if i == 0: all_keep.append(res) else: TM_keep.append(res) deletes = [all_deletes, TM_deletes] keeps = [all_keep, TM_keep] num_atoms1, num_atoms2 = OrderedDict(), OrderedDict() num_atoms = [num_atoms1, num_atoms2] mismatches = [] for m in arrays: for i in range(0, 2): for res in m[i]: if res in deletes[i] or res not in keeps[i]: del m[i][res] else: try: if m[i][res].get_resname() != num_atoms[i][res][ 0].get_parent().get_resname(): del num_atoms[i][res] mismatches.append(res) else: raise Exception() except: if res not in mismatches: atoms = [] for atom in m[i][res]: atoms.append(atom) if res not in num_atoms[i]: num_atoms[i][res] = atoms else: if len(atoms) < len(num_atoms[i][res]): num_atoms[i][res] = atoms atom_lists = [] for m in arrays: this_model = [] for i in range(0, 2): this_list_all = [] this_list_bb = [] for res in m[i]: if res in num_atoms[i]: atoms = [a.get_id() for a in m[i][res].get_list()] ref_atoms = [at.get_id() for at in num_atoms[i][res]] for atom in sorted(atoms): if atom in ref_atoms: this_list_all.append(m[i][res][atom]) if atom in ['N', 'CA', 'C']: this_list_bb.append(m[i][res][atom]) this_model.append(this_list_all) this_model.append(this_list_bb) atom_lists.append(this_model) TM_keys = list(num_atoms[1].keys()) c = 0 for m in atom_lists: c += 1 for i in range(0, 4): if i < 2: j = 0 else: j = 1 if c > 1: self.number_of_residues_superposed['file{}'.format( str(c))][self.four_scores[i]] = len(num_atoms[j]) self.number_of_atoms_superposed['file{}'.format( str(c))][self.four_scores[i]] = len(m[i]) rmsd = self.calc_RMSD(atom_lists[0][i], m[i]) #, TM_keys) self.rmsds['file{}'.format( str(c))][self.four_scores[i]] = rmsd else: self.number_of_residues_superposed['reference'][ self.four_scores[i]] = len(num_atoms[j]) self.number_of_atoms_superposed['reference'][ self.four_scores[i]] = len(m[i]) self.rmsds['reference'][self.four_scores[i]] = None
def _read_pdb(self): """reads the input pdb as a structre object from BioPython""" parser = bp.PDBParser() self.structure = parser.get_structure(self.receptor_name, self.path)
import Bio.PDB as bpdb import numpy as np import pandas as pd import easygui as eg import multiprocessing.dummy as mp #from time import time parser = bpdb.PDBParser() file = eg.fileopenbox(filetypes=['*.pdb']) structure = parser.get_structure('4A_s2', file) angles_by_frame = pd.DataFrame(columns=np.linspace(1, 4, num=4)) frame = 1 clmns = [] rows = {} for i in range(2): clmns.append('phi' f'{i+2}') clmns.append('psi' f'{i+2}') model_list = bpdb.Selection.unfold_entities(structure, 'M') with mp.Pool(32) as pool: chain_list = pool.map(lambda x: x['A'], model_list) poly_list = pool.map(lambda x: bpdb.Polypeptide.Polypeptide(x), chain_list) angle_list = pool.map(lambda x: x.get_phi_psi_list(), poly_list) rowstuff = pool.map( lambda x: np.reshape(x, [1, len(x) * 2])[0][2:-2] * (180 / np.pi), angle_list) rowlist = list(rowstuff) angles_by_frame = pd.DataFrame(rowlist,
def find_files(self): ''' Locates files of one receptor model that can be used as source. ''' sf = SignprotFunctions() other_signprots = sf.get_other_subtypes_in_subfam(self.sign_prot) gprots_with_structure = sf.get_subtypes_with_templates() if self.sign_prot in gprots_with_structure: return None if not os.path.exists(self.zip_path): os.mkdir(self.zip_path) files = os.listdir(self.zip_path) for f in files: if f.endswith('.zip'): modelname = f.split('.')[0] if self.receptor not in modelname: continue found_other_sf = False for o in other_signprots: if o in modelname: found_other_sf = True if not found_other_sf: continue mod_dir = self.zip_path + modelname if not os.path.exists(mod_dir): os.mkdir(mod_dir) zip_mod = zipfile.ZipFile(self.zip_path + f, 'r') zip_mod.extractall(mod_dir) zip_mod.close() name_list = modelname.split('_') if name_list[3] not in ['Inactive', 'Active', 'Intermediate' ] and name_list[4] != 'refined': self.complex = True self.revise_xtal = False gpcr_class = name_list[0][-1] gpcr_prot = '{}_{}'.format(name_list[1], name_list[2].split('-')[0]) sign_prot = '{}_{}'.format(name_list[2].split('-')[1], name_list[3]) main_structure = name_list[4] build_date = name_list[5] p = PDB.PDBParser() self.path_to_pdb = os.sep.join( [self.zip_path, modelname, modelname + '.pdb']) model = p.get_structure( 'receptor', os.sep.join( [self.zip_path, modelname, modelname + '.pdb']))[0]['R'] with open( os.sep.join([ self.zip_path, modelname, modelname + '.templates.csv' ]), 'r') as templates_file: templates = templates_file.readlines() with open( os.sep.join([ self.zip_path, modelname, modelname + '.template_similarities.csv' ]), 'r') as sim_file: similarities = sim_file.readlines() return model, templates, similarities return None
rama_pref_values[key][int(float(line.split()[1])) + 180][int(float(line.split()[0])) + 179] = float(line.split()[2]) normals = {} outliers = {} for key, val in rama_preferences.items(): normals[key] = {"x": [], "y": []} outliers[key] = {"x": [], "y": []} # Calculate the torsion angle of the inputs for inp in sys.argv[1:]: if not os.path.isfile(inp): print("{} not found!".format(inp)) continue structure = PDB.PDBParser().get_structure('input_structure', inp) for model in structure: for chain in model: polypeptides = PDB.PPBuilder().build_peptides(chain) for poly_index, poly in enumerate(polypeptides): phi_psi = poly.get_phi_psi_list() for res_index, residue in enumerate(poly): res_name = "{}".format(residue.resname) res_num = residue.id[1] phi, psi = phi_psi[res_index] if phi and psi: aa_type = "" if str(poly[res_index + 1].resname) == "PRO": aa_type = "PRE-PRO" elif res_name == "PRO": aa_type = "PRO"
def structure_from_pdb_file(file_path, name=''): '''Read the structure stored in a PDB file.''' parser = PDB.PDBParser() return parser.get_structure(name, file_path)
cofactors_dict[line[1]] = [line[3]] pdbl = PDB.PDBList() Error_out = open("microfolds_out.txt", "w") for subdir, dirs, files in os.walk(rootdir): for file in files: try: line = file protein = line[3:7] #print ('pdb_code:'+protein) protein = protein.lower() Error_out.write('pdb_code:' + protein + '\n') parser = PDB.PDBParser(PERMISSIVE=1, get_header=1, QUIET=1) curdir = os.getcwd() filename = rootdir + protein[1:3] + '/' + file #print(filename) final_file = rootdir + protein[1:3] + '/pdb' + protein + '.ent' #print ('unziping') # unzipping gz file gz = gzip.open(filename, 'rb') with open(final_file, 'wb') as out: out.writelines(gz) gz.close() #print ('unziping done') #os.remove(filename) # openning pdb file structure = parser.get_structure(
def main(): usage = './align_stems.py [stem_length]' usage += 'Do diagnostics on the stem model' parser = OptionParser() parser.add_option('-i', '--iterations', dest='iterations', default=1, help="The number of times to repeat the alignment", type='int') parser.add_option('-l', '--length', dest='length', default=2, help="The length of the stem", type='int') parser.add_option('-o', '--output-pdb', dest='output_pdb', default=False, help="Output the structures to pdb files", action='store_true') parser.add_option( '-f', '--from', dest='from_file', default=None, help='Specify a file to align from. Invalidates the -l option.', type='str') parser.add_option( '-t', '--to', dest='to_file', default=None, help='Specify a file to align to. Invalidates the -l option.', type='str') parser.add_option( '-m', '--method', dest='method', default='e', help= 'Specify which method to use for the helix fitting. e = estimate (original, least accurate method), a = align (better, more accurate method), t = template (best, most accurate method)' ) parser.add_option( '-a', '--average-twist', dest='use_average_method', default=False, action='store_true', help='Use the average of the two twists to align the stems.') (options, args) = parser.parse_args() if len(args) < 0: parser.print_help() sys.exit(1) stem_length = options.length if len(args) == 1: stem_length = int(args[0]) if options.from_file == None or options.to_file == None: sss = cbs.get_stem_stats( os.path.join(cbc.Configuration.base_dir, 'fess/stats/temp.1jj2.stats')) rmsds = [] for i in range(options.iterations): if options.from_file != None: filename = options.from_file stem_def = stem_def_from_filename(filename) else: stem_def = random.choice(sss[stem_length]) filename = '%s_%s.pdb' % (stem_def.pdb_name, "_".join( map(str, stem_def.define))) pdb_file = os.path.join(cbc.Configuration.stem_fragment_dir, filename) # Extract the PDB coordinates of the original chain with warnings.catch_warnings(): warnings.simplefilter("ignore") try: chain = list(bpdb.PDBParser().get_structure( 'temp', pdb_file).get_chains())[0] chain = cbm.extract_stem_from_chain(chain, stem_def) except IOError as ie: cud.pv('ie') # Convert the chain into a stem model # This is where the method for fitting a helix is applied #m = cbm.define_to_stem_model(chain, stem_def.define) stem = cbm.StemModel(name=stem_def.define) define = stem_def.define mids = cgg.get_mids(chain, define, options.method) stem.mids = tuple([m.get_array() for m in mids]) stem.twists = cgg.get_twists(chain, define) m = stem # Create a new chain by aligning the stem from the sampled define # to the model created from the original stem new_chain = bpdbc.Chain(' ') try: if options.to_file != None: new_stem_def = stem_def_from_filename(options.to_file) else: new_stem_def = random.choice(sss[stem_def.bp_length]) cbm.reconstruct_stem_core(new_stem_def, stem_def.define, new_chain, dict(), m, options.use_average_method) except IOError as ie: cud.pv('ie') if options.output_pdb: rtor.output_chain(chain, 'out1.pdb') rtor.output_chain(new_chain, 'out3.pdb') unsuperimposed_rmsd = cup.pdb_rmsd(chain, new_chain, sidechains=False, superimpose=False) superimposed_rmsd = cup.pdb_rmsd(chain, new_chain, sidechains=False, superimpose=True, apply_sup=True) rmsds += [[superimposed_rmsd[1], unsuperimposed_rmsd[1]]] #cud.pv('(superimposed_rmsd, unsuperimposed_rmsd)') if options.output_pdb: rtor.output_chain(new_chain, 'out2.pdb') pp = cvp.PymolPrinter() (p, n) = m.mids pp.add_stem_like_core(m.mids, m.twists, stem_def.bp_length + 1, '') pp.stem_atoms(m.mids, m.twists, stem_def.bp_length + 1) pp.dump_pymol_file('ss') print stem_length, superimposed_rmsd[1], unsuperimposed_rmsd[ 1], unsuperimposed_rmsd[1] / superimposed_rmsd[1]
def setUp(self): self.pdb_list = biopdb.PDBList(verbose=False, obsolete_pdb=gettempdir()) self.biopdb_parser = biopdb.PDBParser()
def __init__(self, ring, pdb, fragments, pdb_out, v): self.fragments = fragments self.ring = PDB.PDBParser().get_structure('ring', ring) self.pdb = PDB.PDBParser().get_structure('pdb', pdb) self.pdb_out = pdb_out self.v = v
if sym == 'HOH': if full: seq += '-' continue if sym in d1.keys(): seq += d1[sym] elif len(seq) != 0: # TODO: convert to warning # h.write('ID: %s chain %s: unknown residue %s\n' % (pdbID, chainName, sym)) seq += '-' # continue return seq if __name__ == '__main__': parser = pdb.PDBParser(QUIET=True) output = 'chain_seq.txt' g = open(output, 'w') start_dir = './pdb/' files = os.listdir(start_dir) # print(len(files)) for ifile in range(len(files)): filename = files[ifile] if filename == '.' or filename == '..': continue pdbID = filename.split('.')[0] g.write(str(pdbID) + '\n')
params.read(expandpath(args.iconf), "UTF-8") tmp = tempfile.mkstemp()[1] params["Protein"]["pdb"] = expandpath(params["Protein"]["pdb"]) output_pdb_filepath = args.opdb os.system(f"{REDUCE} -Trim {params['Protein']['pdb']} > {tmp}.noH.pdb") os.system( f"""{GMX} trjconv -f {tmp}.noH.pdb -s {tmp}.noH.pdb -o {tmp}.center.pdb -center -boxcenter zero << EOF Protein System EOF""") os.system( f"{GMX} editconf -f {tmp}.center.pdb -o {output_pdb_filepath} -resnr 1" ) old_pdb = PDB.PDBParser().get_structure("old", params["Protein"]["pdb"]) new_pdb = PDB.PDBParser().get_structure("new", output_pdb_filepath) old_to_new = gen_resis_table(old_pdb, new_pdb) length = len([res for res in new_pdb.get_residues() if res.id[0] == " "]) params["Protein"]["resi_st"] = "1" params["Protein"]["resi_ed"] = f"{length}" params["Protein"]["pdb"] = output_pdb_filepath new_ssbonds = [ old_to_new[int(s)] for s in params["Protein"]["ssbond"].split() ] params["Protein"]["ssbond"] = " ".join([str(s) for s in new_ssbonds]) new_binding_residues = [ old_to_new[int(s)] for s in params["Protein"]["binding_site_residues"].split() ]
import sys import re def join_fasta(*fastas): text = '' for fhand in fastas: with open(fhand) as f1: for line in f1: text += line return text if __name__ == '__main__': directory = sys.argv[1] # Folder with the chain pdbs distance = float(sys.argv[2]) directory_out = os.path.join(directory, 'pairs') # Folder where pairs will be saved PDBparser = PDB.PDBParser(QUIET=True) done = [] for pdb_1 in list(filter(lambda x: x.endswith('.pdb'), os.listdir(directory))): for pdb_2 in list(filter(lambda x: x.endswith('.pdb'), os.listdir(directory))): if (pdb_1 == pdb_2): continue # Not to duplicate itself elif not (pdb_1, pdb_2) in done or not (pdb_2, pdb_1) in done: # Not to duplicate pairs done.append((pdb_1, pdb_2)) structure1 = PDBparser.get_structure(pdb_1[:-4], os.path.join(directory,pdb_1)) structure2 = PDBparser.get_structure(pdb_2[:-4], os.path.join(directory,pdb_2)) for chain in structure2.get_chains(): atoms = list(chain.get_atoms()) ns = PDB.NeighborSearch(atoms) # An object to search chains near an atom for target_atom in structure1.get_atoms(): near = ns.search(target_atom.coord, distance) if(near and (pdb_1[-5:-4],pdb_2[-5:-4]) not in done and (pdb_2[-5:-4],pdb_1[-5:-4]) not in done): # If there's an atom near, they interact
import glob from Bio import PDB pdb_files = glob.iglob('all_pdbs/*') file = open('casp11.sec', 'w') c = 0 for pdb in pdb_files: c += 1 print(c) p = PDB.PDBParser() structure = p.get_structure(pdb[:-4], pdb) model = structure[0] dssp = PDB.DSSP(model, pdb) seq = '' ss = '' for key in list(dssp.keys()): ss += dssp[key][2] seq += dssp[key][1] file.write('>{}\n'.format(pdb)) file.write('{}\n'.format(seq)) file.write('>{}\n'.format(pdb)) file.write('{}\n'.format(ss)) file.close()
pdb_stats = [] for pdb_id in pdb_unique_ids: file_fasta = file_pref + pdb_id + ".fasta" file_pdb = file_pref + pdb_id + ".pdb" sequence_fasta = Seq("") num_monomers = 0 for seq_record in SeqIO.parse(file_fasta, "fasta"): sequence_fasta += seq_record.seq num_monomers += 1 # uncomment the following line to produce gc_stats.pkl #pdb_seqlength.append((pdb_id,len(sequence_fasta)/num_monomers)) # i.e. all residues taken into account, not only ones seen in crystal # comment out calculation of the sequence from the pdb file ppb = PDB.PPBuilder() struct = PDB.PDBParser().get_structure(pdb_id, file_pdb) peptides = ppb.build_peptides(struct) sequence_pdb = Seq("") ref_residues = [] for peptide in peptides: sequence_pdb += peptide.get_sequence() for residue in peptide: ref_residues.append(residue) pdb_seqlength.append((pdb_id, len(ref_residues) / num_monomers)) alignment = pairwise2.align.globalds(sequence_fasta, sequence_pdb, matrix, gap_open, gap_extend, one_alignment_only=True)[0]
def get_rnapuzzle_ready(self, renumber_residues=True): """Get rnapuzzle ready structure. Submission format @http://ahsoka.u-strasbg.fr/rnapuzzles/ Does: - keep only given atoms, - renumber residues from 1, if renumber_residues=True (by default) """ try: from Bio import PDB from Bio.PDB import PDBIO except: sys.exit('Error: Install biopython to use this function (pip biopython)') import copy G_ATOMS = ['P', 'OP1', 'OP2', 'O5\'', 'C5\'', 'C4\'', 'O4\'', 'C3\'', 'O3\'', 'C2\'', 'O2\'', 'C1\'', 'N9', 'C8', 'N7', 'C5', 'C6', 'O6', 'N1', 'C2', 'N2', 'N3', 'C4'] A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split() U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split() C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split() ftmp = '/tmp/out.pdb' self.write(ftmp,v=False) parser = PDB.PDBParser() struct = parser.get_structure('', ftmp) model = struct[0] s2 = PDB.Structure.Structure(struct.id) m2 = PDB.Model.Model(model.id) chains2 = [] missing = [] for chain in model.get_list(): res = [] for r in chain: res.append(r) res = copy.copy(res) c2 = PDB.Chain.Chain(chain.id) c = 1 # new chain, goes from 1 !!! for r in res: # hack for amber/qrna r.resname = r.resname.strip() if r.resname == 'RC3': r.resname = 'C' if r.resname == 'RU3': r.resname = 'U' if r.resname == 'RG3': r.resname = 'G' if r.resname == 'RA3': r.resname = 'A' if r.resname == 'C3': r.resname = 'C' if r.resname == 'U3': r.resname = 'U' if r.resname == 'G3': r.resname = 'G' if r.resname == 'A3': r.resname = 'A' if r.resname == 'RC5': r.resname = 'C' if r.resname == 'RU5': r.resname = 'U' if r.resname == 'RG5': r.resname = 'G' if r.resname == 'RA5': r.resname = 'A' if r.resname == 'C5': r.resname = 'C' if r.resname == 'U5': r.resname = 'U' if r.resname == 'G5': r.resname = 'G' if r.resname == 'A5': r.resname = 'A' if r.resname.strip() == 'RC': r.resname = 'C' if r.resname.strip() == 'RU': r.resname = 'U' if r.resname.strip() == 'RG': r.resname = 'G' if r.resname.strip() == 'RA': r.resname = 'A' r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid) if renumber_residues: r2.id = (r2.id[0], c, r2.id[2]) ## renumber residues if str(r.get_resname()).strip() == "G": for an in G_ATOMS: try: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "A": for an in A_ATOMS: try: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "C": for an in C_ATOMS: try: r2.add(r[an]) except: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "U": for an in U_ATOMS: try: r2.add(r[an]) except KeyError: #print 'Missing:', an, r,' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) c += 1 chains2.append(c2) io = PDBIO() s2.add(m2) for chain2 in chains2: m2.add(chain2) #print c2 #print m2 io.set_structure(s2) #fout = fn.replace('.pdb', '_fx.pdb') fout = '/tmp/outout.pdb' # hack io.save(fout) if missing: print('REMARK 000 Missing atoms:') for i in missing: print('REMARK 000 +', i[0], i[1], i[2], 'residue #', i[3]) #raise Exception('Missing atoms in %s' % self.fn) s = StrucFile(fout) self.lines = s.lines
def __init__(self, filepath): """Constructor of one pdb file : PDBFile. Arguments : ------------ filepath : string path to the pdb file """ # ----- # save id extracted from path : self.id = filepath[-8:-4] # ----- # init parser : parser = PDB.PDBParser() struct = parser.get_structure("", filepath) # ----- # extract from header : self.keywords = struct.header['keywords'] self.name = struct.header['name'] self.head = struct.header['head'] self.deposition_date = struct.header['deposition_date'] self.release_date = struct.header['release_date'] self.structure_method = struct.header['structure_method'] self.resolution = struct.header['resolution'] self.structure_reference = str(struct.header['structure_reference']) self.journal_reference = struct.header['journal_reference'] self.author = struct.header['author'] self.compound = str(struct.header['compound']) # ----- # Get the sequence and the angles # extract all polypeptides from the structure : ppb = PDB.CaPPBuilder() # The sequence of each polypeptide can then easily be obtained # from the Polypeptide objects : self.seq = "" atom_idx = 0 start = 0 end = 0 for pp, chain in zip(ppb.build_peptides(struct), struct.get_chains()): print (pp) seq = str(pp.get_sequence()) # The sequence is represented as a Biopython Seq object, # and its alphabet is defined by a ProteinAlphabet object. print (seq) self.seq += seq # Get the boundary of the peptide # using residu id # A residue id is a tuple with three elements: # - The hetero-flag # - *The sequence identifier in the chain* # - The insertion code, # start of the polypeptide : pp[0].get_id()[1] # end of the polypeptide : pp[-1].get_id()[1] start = end + 1 print (start) end = start + len(seq)-1 print (end) # |-----------||-------------------| # sA sA sB eB self.chains.append(Chain(chain.id, self.id, start, end)) # Get phi psi angle angles = pp.get_phi_psi_list() # Some are None because : # - Some atoms are missing # -> Phi/Psi cannot be calculated for some residue # - No phi for residue 0 # - No psi for last residue print(angles) for phi, psi in angles: atom_idx += 1 self.angles.append(Angle(self.id, atom_idx, phi, psi))
def fetch_residues_from_pdb(self, structure, generic_numbers, modify_bulges=False, just_nums=False): ''' Fetches specific lines from pdb file by generic number (if generic number is not available then by residue number). Returns nested OrderedDict() with generic numbers as keys in the outer dictionary, and atom names as keys in the inner dictionary. @param structure: Structure, Structure object where residues should be fetched from \n @param generic_numbers: list, list of generic numbers to be fetched \n @param modify_bulges: boolean, set it to true when used for bulge switching. E.g. you want a 5x461 residue to be considered a 5x46 residue. ''' output = OrderedDict() atoms_list = [] for gn in generic_numbers: rotamer = None if 'x' in str(gn): rotamer = list( Rotamer.objects.filter( structure__protein_conformation=structure. protein_conformation, residue__display_generic_number__label=dgn( gn, structure.protein_conformation), structure__preferred_chain=structure.preferred_chain)) else: rotamer = list( Rotamer.objects.filter( structure__protein_conformation=structure. protein_conformation, residue__sequence_number=gn, structure__preferred_chain=structure.preferred_chain)) if just_nums == False: try: gn = ggn( Residue.objects.get(protein_conformation=structure. protein_conformation, sequence_number=gn). display_generic_number.label) except: pass if len(rotamer) > 1: for i in rotamer: if i.pdbdata.pdb.startswith('COMPND') == False: if i.pdbdata.pdb[21] in structure.preferred_chain: rotamer = i break else: rotamer = rotamer[0] io = StringIO(rotamer.pdbdata.pdb) rota_struct = PDB.PDBParser(QUIET=True).get_structure( 'structure', io)[0] for chain in rota_struct: for residue in chain: for atom in residue: atoms_list.append(atom) if modify_bulges == True and len(gn) == 5: output[gn.replace('x', '.')[:-1]] = atoms_list else: try: output[gn.replace('x', '.')] = atoms_list except: output[str(gn)] = atoms_list atoms_list = [] return output