def calculate_residue_depth(aromatic_residues, model): """Returns a list of surface exposed residues as determined by residue depth. Parameters ---------- aromatic_residues: list of :class:`Bio.PDB.Residue.Residue` residues included in the analysis model: :class:`Bio.PDB.Model.Model` BioPython object containing a model of a PDB or MMCIF file Returns ------- surface_exposed_res: list of str List of residue names corresponding to the surface exposed residues """ try: surface = get_surface(model) cutoff = 3.03 surface_exposed_res = [] for residue in aromatic_residues: depth = residue_depth(residue, surface) if (depth <= cutoff): surface_exposed_res.append(residue.node_label) return surface_exposed_res except Exception as e: warnings.warn( "Unable to calculate residue depth. Check that MSMS is installed. Please note that MSMS is not compatible with MacOS Catalina.", RuntimeWarning, stacklevel=2) return []
def GetResidueDepPDB(pdb, pdbfile): s = GetStructure(pdb) model = s[0] residuelist = Selection.unfold_entities(model, 'R') try: surface = get_surface(pdbfile, PDBTOXYZ, MSMS) except: print "cannot get surface for " + pdbfile return content = "" for residue in residuelist: if not is_aa(residue): continue # minimun average depth for all atoms resid = residue.get_id() resname = residue.get_resname() chainid = residue.get_parent().get_id() try: rd = residue_depth(residue, surface) except: continue ca_rd = ca_depth(residue, surface) info = [pdb, chainid, resid[1], resname, str(rd), str(ca_rd)] for each in info: if not each: continue #print info newline = "\t".join(map(str, info)) + "\n" content = content + newline mutex_writefile.acquire() outobj = open(OUT, "a") outobj.write(content) outobj.close() mutex_writefile.release()
def get_residue_depths(pdb_file): """ Get residue depths Parameters ---------- pdb_file Returns ------- dict of depth _ ca/cb/mean """ structure, residues, _, _, _ = read_pdb(pdb_file) surface = get_surface(structure) data = { "depth_mean": np.array([residue_depth(residue, surface) for residue in residues]), "depth_cb": np.array([ min_dist(get_beta_coordinates(residue), surface) for residue in residues ]), "depth_ca": np.array([ca_depth(residue, surface) for residue in residues]), } return data
def get_structure_stat(structure, surface): print "* " + structure.id print " number of models:" + str(len(structure.get_list())) modelnum = 1 for model in structure.get_list(): print " Model " + str(modelnum) print " number of chains:" + str(len(model.get_list())) #print list(model) for chain in model.get_list(): print " chain " + chain.id print " number of residues : " + str( len(chain.child_list)) count_W = 0 count_H = 0 text_H = '' for res in chain: id = res.id if id[0] == 'W': count_W = count_W + 1 elif id[0] != ' ': #print id #residue_depth(res, surface) resname = res.resname type = '?' #print resname if (Is_carbohydrate_residue(resname)): type = ' carbohydrate ' if (Is_cofactor_residue(resname)): type = ' cofactor ' if (Is_protein_residue(resname)): type = ' protein ' if (Is_ion_residue(resname)): type = ' ion ' if Is_not_lig_residue(res): type = ' notLig ' if Has_backbone(res): type = type + ' has backbone: amino_acid??' if Is_lig_residue(res): type = type + ' maybe Lig? ' rd = residue_depth(res, surface) o = calc_occupancy(res) b = calc_bfactor(res) text_H = text_H + " " + id[0] + " " + str( id[1]) + " (barial = " + str(rd) + ";" text_H = text_H + " occupancy = " + str(o) + ";" text_H = text_H + " bfactor = " + str( b) + ";" + resname + "==" + type text_H = text_H + ")" + '\n' count_H = count_H + 1 print " number that are water residues : " + str( count_W) print " number that are other hetro-residues : " + str( count_H) print text_H #print " number of atoms: " + modelnum = modelnum + 1
def get_depth_contact_res(model, contact_res): surface = get_surface(model) data = {} for r in contact_res: chain = model[r[0]] res = chain[r[1]] data[r] = residue_depth(res, surface) return data
def get_depth_contact_res(model, contact_res): """Get the residue Depth Args: model (bio model): model of the strucrture contact_res (list): list of contact residues Returns: dict: depth res """ surface = get_surface(model) data = {} for r in contact_res: chain = model[r[0]] res = chain[r[1]] data[r] = residue_depth(res, surface) return data
def get_structure_eval(structure, surface): size_score = eval_size(structure) rd = 0 o = 0 b = 0 for model in structure.get_list(): for chain in model.get_list(): rd = 0 o = 0 b = 0 for res in chain: rd = rd + residue_depth(res, surface) o = o + calc_occupancy(res) b = b + calc_bfactor(res) # we should also consider penlizing the ligand if not all atoms are reprenented in pdb #eval = rd + o - 0.5 * b + size_score eval = rd + size_score return rd, o, b, eval
def readPDBFile(structName, fileName): charged = {"ARG", "HIS", "LYS", "ASP", "GLU"} polar = {"SER", "THR", "TYR", "ASN", "GLN"} nonPolar = { "ALA", "VAL", "ILE", "LEU", "MET", "PHE", "PRO", "TRP", "GLY", "CYS" } #code from https://warwick.ac.uk/fac/sci/moac/people/students/peter_cock/python/ramachandran/calculate/#BioPython structure = Bio.PDB.PDBParser().get_structure(structName, fileName) for model in structure: surface = get_surface( model) #numpy array of all surface vertices of folded protein for chain in model: polypeptides = Bio.PDB.PPBuilder().build_peptides(chain) #dssp = DSSP(chain, fileName, dssp = "mkdssp") atoms = Bio.PDB.Selection.unfold_entities(chain, "A") ns = Bio.PDB.NeighborSearch(atoms) for poly_index, poly in enumerate(polypeptides): startIndex = poly[0].id[1] endIndex = poly[-1].id[1] phiPsi = poly.get_phi_psi_list() for residue in range(len(poly)): #takes first data point for overlapping chains if poly[residue].id[1] not in result: res = poly[residue].resname #encoded as int from 0-1 resCode = Bio.PDB.Polypeptide.three_to_index( poly[residue].resname) / 20 phi, psi = phiPsi[residue] #phi/psi cannot be calculated at edges if phi != None: phi /= 360 if psi != None: psi /= 360 #average depth of all atoms in residue from surface depth = residue_depth(poly[residue], surface) #return number of total atoms and grouped residues within searchRadius of resiude's alpha carbon searchRadius = 5 closeAtoms = ns.search(poly[residue]["CA"].coord, searchRadius) numCloseAtoms = len(closeAtoms) residues, curCharged, curPolar, curNonPolar = set( ), set(), set(), set() for atom in closeAtoms: currentRes = atom.get_parent() curName = currentRes.resname residues.add(currentRes) if curName in charged: curCharged.add(currentRes) elif curName in polar: curPolar.add(currentRes) else: curNonPolar.add(currentRes) if (chain, poly[residue].id) in dssp: secondary = (Bio.PDB.DSSP.ss_to_index( dssp[(chain, poly[residue].id)][2])) / 7 energyList = list(dssp[(chain, poly[residue].id)][6:]) featuresList = [ resCode, phi, psi, depth, len(residues), len(curCharged), len(curPolar), len(curNonPolar) ] + [secondary] + energyList if None not in featuresList: #removes residues for which phi/psi cannot be calculated result[poly[residue].id[1]] = featuresList return result
def get_residue_depth(pdb_fh, msms_fh): """ Extracts Residue depth from PDB structure :param pdb_fh: path to PDB structure file :param msms_fh: path to MSMS libraries :returns data_depth: pandas table with residue depth per residue """ from Bio.PDB import Selection, PDBParser from Bio.PDB.Polypeptide import is_aa from Bio.PDB.ResidueDepth import get_surface, _read_vertex_array, residue_depth, ca_depth, min_dist surface_fh = "%s/%s.msms.vert" % (dirname(msms_fh), basename(pdb_fh)) if not exists(surface_fh): pdb_to_xyzr_fh = "%s/pdb_to_xyzr" % dirname(msms_fh) xyzr_fh = "%s/%s.xyzr" % (dirname(msms_fh), basename(pdb_fh)) pdb_to_xyzr_com = "%s %s > %s" % (pdb_to_xyzr_fh, pdb_fh, xyzr_fh) msms_com = "%s -probe_radius 1.5 -if %s -of %s > %s.log" % ( msms_fh, xyzr_fh, splitext(surface_fh)[0], splitext(surface_fh)[0]) log_fh = "%s.log" % msms_fh log_f = open(log_fh, 'a') log_f.write("%s;\n%s\n" % (pdb_to_xyzr_com, msms_com)) subprocess.call("%s;%s" % (pdb_to_xyzr_com, msms_com), shell=True, stdout=log_f, stderr=subprocess.STDOUT) log_f.close() surface = _read_vertex_array(surface_fh) pdb_parser = PDBParser() pdb_data = pdb_parser.get_structure("pdb_name", pdb_fh) model = pdb_data[0] residue_list = Selection.unfold_entities(model, 'R') depth_dict = {} depth_list = [] depth_keys = [] for residue in residue_list: if not is_aa(residue): continue rd = residue_depth(residue, surface) ca_rd = ca_depth(residue, surface) # Get the key res_id = residue.get_id() chain_id = residue.get_parent().get_id() if chain_id == "A": depth_dict[(chain_id, res_id)] = (rd, ca_rd) depth_list.append((residue, (rd, ca_rd))) depth_keys.append((chain_id, res_id)) # Update xtra information residue.xtra['EXP_RD'] = rd residue.xtra['EXP_RD_CA'] = ca_rd else: break depth_df = pd.DataFrame(depth_dict).T.reset_index() depth_df = depth_df.drop("level_0", axis=1) aasi_prev = 0 for i in range(len(depth_df)): if depth_df.loc[i, "level_1"][1] != aasi_prev: depth_df.loc[i, "aasi"] = depth_df.loc[i, "level_1"][1] aasi_prev = depth_df.loc[i, "level_1"][1] depth_df = depth_df.drop("level_1", axis=1) depth_df = depth_df.loc[~pd.isnull(depth_df.loc[:, "aasi"]), :] depth_df = depth_df.set_index("aasi", drop=True) depth_df.columns = ["Residue depth", "Residue (C-alpha) depth"] return depth_df
def get_residue_depth(pdb_fh,msms_fh): """ Extracts Residue depth from PDB structure :param pdb_fh: path to PDB structure file :param msms_fh: path to MSMS libraries :returns data_depth: pandas table with residue depth per residue """ from Bio.PDB import Selection,PDBParser from Bio.PDB.Polypeptide import is_aa from Bio.PDB.ResidueDepth import get_surface,_read_vertex_array,residue_depth,ca_depth,min_dist surface_fh="%s/%s.msms.vert" % (dirname(msms_fh),basename(pdb_fh)) if not exists(surface_fh): pdb_to_xyzr_fh="%s/pdb_to_xyzr" % dirname(msms_fh) xyzr_fh="%s/%s.xyzr" % (dirname(msms_fh),basename(pdb_fh)) pdb_to_xyzr_com="%s %s > %s" % (pdb_to_xyzr_fh,pdb_fh,xyzr_fh) msms_com="%s -probe_radius 1.5 -if %s -of %s > %s.log" % (msms_fh,xyzr_fh,splitext(surface_fh)[0],splitext(surface_fh)[0]) log_fh="%s.log" % msms_fh log_f = open(log_fh,'a') log_f.write("%s;\n%s\n" % (pdb_to_xyzr_com,msms_com)) subprocess.call("%s;%s" % (pdb_to_xyzr_com,msms_com) , shell=True,stdout=log_f, stderr=subprocess.STDOUT) log_f.close() surface =_read_vertex_array(surface_fh) pdb_parser=PDBParser() pdb_data=pdb_parser.get_structure("pdb_name",pdb_fh) model = pdb_data[0] residue_list = Selection.unfold_entities(model, 'R') depth_dict = {} depth_list = [] depth_keys = [] for residue in residue_list: if not is_aa(residue): continue rd = residue_depth(residue, surface) ca_rd = ca_depth(residue, surface) # Get the key res_id = residue.get_id() chain_id = residue.get_parent().get_id() if chain_id=="A": depth_dict[(chain_id, res_id)] = (rd, ca_rd) depth_list.append((residue, (rd, ca_rd))) depth_keys.append((chain_id, res_id)) # Update xtra information residue.xtra['EXP_RD'] = rd residue.xtra['EXP_RD_CA'] = ca_rd else: break depth_df=pd.DataFrame(depth_dict).T.reset_index() depth_df=depth_df.drop("level_0",axis=1) aasi_prev=0 for i in range(len(depth_df)): if depth_df.loc[i,"level_1"][1]!=aasi_prev: depth_df.loc[i,"aasi"]=depth_df.loc[i,"level_1"][1] aasi_prev=depth_df.loc[i,"level_1"][1] depth_df=depth_df.drop("level_1",axis=1) depth_df=depth_df.loc[~pd.isnull(depth_df.loc[:,"aasi"]),:] depth_df=depth_df.set_index("aasi",drop=True) depth_df.columns=["Residue depth","Residue (C-alpha) depth"] return depth_df