def sumAccesbilityBS(ppocketatom, proteinASA, proteinRSA): cASA = PDB.PDB(proteinASA) cASA.get_lAtoms() cRSA = parseRSAfile.RSA(proteinRSA) cBS = PDB.PDB(ppocketatom) latomBS = cBS.get_lAtoms() dout = {} dout["sumASA"] = 0.0 for atomBS in latomBS: for atomProt in cASA.latom: if atomBS.chainID == atomProt.chainID and atomBS.name == atomProt.name and atomBS.resName == atomProt.resName and atomBS.serial == atomProt.serial: dout["sumASA"] += float(atomProt.Bfact) break dout["sumRSAabs"] = 0.0 dout["sumRSArel"] = 0.0 for res in cRSA.lres: for atomBS in latomBS: if atomBS.chainID == res.chainID and atomBS.resName == res.resName and atomBS.resSeq == res.resSeq: if res.ABSall != "N/A": dout["sumRSAabs"] += float(res.ABSall) if res.RELall != "N/A": dout["sumRSArel"] += float(res.RELall) break return dout
def computeRMSFresBS(self, pr_MDout): # load BS in frame 0 l_pBS = listdir(pr_MDout + "BSs/") l_res = [] for pBS in l_pBS: cBS = PDB.PDB(pr_MDout + "BSs/" + pBS) dres = cBS.get_byres() for res in dres.keys(): nRes = res.split("_")[1] if not nRes in l_res: l_res.append(nRes) # rewrite RMSF with binding site presRMSF = pr_MDout + "RMSDs/residues/resRMSD" ldresRMSF = toolbox.matrixToList(presRMSF) # rewrting pfilout = pr_MDout + "RMSDs/residues/resRMSD_BS" filout = open(pfilout, "w") filout.write("NameRes\tall\tCa\tDmax\tBS\n") for dresRMSF in ldresRMSF: if dresRMSF["NameRes"] in l_res: BS = 1 else: BS = 0 filout.write("%s\t%s\t%s\t%s\t%s\n" % (dresRMSF["NameRes"], dresRMSF["all"], dresRMSF["Ca"], dresRMSF["Dmax"], BS)) filout.close() return pfilout
def computeFPI(self, clean=0): frameID = self.plig.split("/")[-1].split("_")[1].split(".")[0] pfileFPI = self.prout + "FPI_" + frameID + ".csv" # if file exsit => load PFI if path.exists(pfileFPI) and clean == 0: return pfileFPI fileFPI = open(pfileFPI, "w") # header fileFPI.write("Ligand and pocket res\tList residues in pocket\tFPI\n") # define residue pocket cPocket = PDB.PDB(self.pBS, hydrogen=1) cPocket.get_byres(onlyres=1) lres = cPocket.getListResForFPI() pyplif.get_FPI(pligPDB=self.plig, ppocketPDB=self.pBS, lres=lres, filout=fileFPI) fileFPI.close() return pfileFPI
def __init__(self, ppocket, pPDB): self.pprotein = pPDB self.ppocket = ppocket cPocket = PDB.PDB(ppocket) # not included hydrogen self.latoms = cPocket.get_lAtoms() self.byresall = cPocket.get_byres(onlyres=1)
def RMSDProt(self): prRMSDprot = self.dMD["prRMSD"] + "protein/" pathFolder.createFolder(prRMSDprot) if not "prSuperMatrix" in dir(self): self.Superimpose(0) # pfilout pfilout = prRMSDprot + "protRMSD" if path.exists(pfilout): return else: filout = open(pfilout, "w") filout.write("Time\tRMSDall\tRMSDC\tDmax\n0\t0\t0\t0\n") # open reference frame nframeref = str("%05d" % (0)) pframeref = self.dMD["prframe"] + "frame_" + nframeref + ".pdb" cprotref = PDB.PDB(PDB_input=pframeref) cprotref.get_atomProt() i = self.stepFrame imax = float(self.MDtime) / float(self.timeframe) while i < imax: nframe2 = str("%05d" % (i)) pframe2 = self.dMD["prframe"] + "frame_" + nframe2 + ".pdb" cprot2 = PDB.PDB(PDB_input=pframe2) cprot2.get_atomProt() pmatrix = self.prSuperMatrix + str(nframeref) + "_" + str(nframe2) #apply matrix on frame 2 matrixload = toolbox.loadMatrixTMalign(pmatrix) for atomprot2 in cprot2.latomProt: atomprot2.applyMatrixRotTransloc(matrixload) lRMSD = calculate.RMSDTwoList(cprotref.latomProt, cprot2.latomProt) filout.write("%s\t%s\t%s\t%s\n" % (i / 100.0, lRMSD[0], lRMSD[1], lRMSD[2])) i += self.stepFrame filout.close() runExternalSoft.runscatterplotRMSD(pfilout)
def computeRMSDProt(self, pr_MDout): # load ligand in frame 0 cfram0 = PDB.PDB(pr_MDout + "framesMD/frame_00000.pdb") cfram0.get_lAtoms() pr_TMalign = pr_MDout + "RMSDs/superimpose/" l_pTMaling = listdir(pr_TMalign) dRMSD = {} for pTMalign in l_pTMaling: frame = pTMalign.split("_")[-1] #print frame dmatrixTMalign = toolbox.loadMatrixTMalign(pr_TMalign + pTMalign) cFrame = PDB.PDB("%sframesMD/frame_%s.pdb" % (pr_MDout, frame)) cFrame.get_lAtoms() for atomLig in cFrame.latom: atomLig.applyMatrixRotTransloc(dmatrixTMalign) RMSDframe = calculate.RMSDTwoList(cfram0.latom, cFrame.latom) dRMSD[frame] = RMSDframe # write the RMSD lig file pfilout = pr_MDout + "RMSDs/protein/protRMSD_all" filout = open(pfilout, "w") filout.write("Time\tRMSDall\tRMSDC\tDmax\n") filout.write("0.0\t0.0\t0.0\t0.0\n") i = 1 imax = len(dRMSD.keys()) while i <= imax: frame = str("%05d" % (i)) filout.write( "%.2f\t%s\t%s\t%s\n" % (i / 100.0, dRMSD[frame][0], dRMSD[frame][1], dRMSD[frame][2])) i = i + 1 filout.close() return pfilout
def computeRMSDLig(self, pr_MDout): # load ligand in frame 0 clig0 = PDB.PDB(pr_MDout + "lig/LGD_00000.pdb") clig0.get_lAtoms() pr_TMalign = pr_MDout + "RMSDs/superimpose/" l_pTMaling = listdir(pr_TMalign) dRMSD = {} for pTMalign in l_pTMaling: frame = pTMalign.split("_")[-1] #print frame dmatrixTMalign = toolbox.loadMatrixTMalign(pr_TMalign + pTMalign) cligFrame = PDB.PDB("%slig/LGD_%s.pdb" % (pr_MDout, frame)) cligFrame.get_lAtoms() for atomLig in cligFrame.latom: atomLig.applyMatrixRotTransloc(dmatrixTMalign) RMSDframe = calculate.RMSDTwoList(clig0.latom, cligFrame.latom) dRMSD[frame] = RMSDframe[0] # write the RMSD lig file pfilout = pr_MDout + "RMSDs/ligand/ligRMSD" filout = open(pfilout, "w") filout.write("Time\tRMSD\n") filout.write("0.0\t0.0\n") i = 1 imax = len(dRMSD.keys()) while i <= imax: frame = str("%05d" % (i)) filout.write("%.2f\t%s\n" % (i / 100.0, dRMSD[frame])) i = i + 1 filout.close() return pfilout
def ASAHydrophobicityPolarity(ppdbasa, pBS): cPDBasa = PDB.PDB(ppdbasa) latomASA = cPDBasa.get_lAtoms() cBS = PDB.PDB(pBS) latomBS = cBS.get_lAtoms() dcompute = {} dcompute["C"] = [] dcompute["O"] = [] dcompute["N"] = [] dcompute["Scys"] = [] dcompute["Smet"] = [] for atomBS in latomBS: for atomProt in latomASA: if atomBS.chainID == atomProt.chainID and atomBS.name == atomProt.name and atomBS.resName == atomProt.resName and atomBS.serial == atomProt.serial: if atomBS.element != "S": dcompute[atomBS.element].append(float(atomProt.Bfact)) else: if atomBS.resName == "CYS": dcompute["Scys"].append(float(atomProt.Bfact)) elif atomBS.resName == "MET": dcompute["Smet"].append(float(atomProt.Bfact)) else: print atomBS dddd break polarityASA = (sum(dcompute["O"]) + sum(dcompute["N"]) + sum( dcompute["Scys"])) / (sum(dcompute["O"]) + sum(dcompute["N"]) + sum( dcompute["Scys"]) + sum(dcompute["Smet"]) + sum(dcompute["C"])) hydrophobicityASA = (sum(dcompute["C"]) + sum(dcompute["Smet"])) / ( sum(dcompute["O"]) + sum(dcompute["N"]) + sum(dcompute["Scys"]) + sum(dcompute["Smet"]) + sum(dcompute["C"])) return [polarityASA, hydrophobicityASA]
def extractLigBSbyFrame(self, BSCutoff, namelig, clean=0): c = 1 for jobname in self.lMD.keys(): print c, jobname if "prframe" in self.lMD[jobname].keys(): self.lMD[jobname]["prBSs"] = self.pranalysis + str( jobname) + "/BSs/" pathFolder.createFolder(self.lMD[jobname]["prBSs"], clean=clean) self.lMD[jobname]["prLig"] = self.pranalysis + str( jobname) + "/lig/" pathFolder.createFolder(self.lMD[jobname]["prLig"], clean=clean) lpframe = [ self.lMD[jobname]["prframe"] + i for i in listdir(self.lMD[jobname]["prframe"]) ] nb_frame = len(listdir(self.lMD[jobname]["prframe"])) if len(listdir( self.lMD[jobname]["prLig"])) >= nb_frame and len( listdir(self.lMD[jobname]["prBSs"])) >= nb_frame: c += 1 print "=> pass" continue else: for pframe in lpframe: cPDB = PDB.PDB(pframe, hydrogen=1) latomlig = cPDB.get_lig(namelig) cPDB.get_BSfromlig(dpocket=BSCutoff) # add step of rename atom pLGD = self.lMD[jobname][ "prLig"] + "LGD_" + pframe.split("_")[-1] pBS = self.lMD[jobname][ "prBSs"] + "BS_" + pframe.split("_")[-1] cPDB.writePDB(pLGD, latomlig, conect=1) cPDB.writePDB(pBS, cPDB.pocketsRES["UNK_900_A"] ) # default in schrodinger c += 1
def superimposedPoseCluster(self): for prdocking in self.lprdockingpose: prout = self.prout + prdocking.split("/")[-2] + "/" lposes = listdir(prdocking) proutSUperimpose = prout + "Superimposed/" pathFolder.createFolder(proutSUperimpose) for cluster in self.clusters.keys(): pclusterpose = proutSUperimpose + str(cluster) + ".pdb" for compound in self.clusters[cluster]: for pose in lposes: if pose[-3:] != "sdf": continue elif search(compound, pose): pposePDB = runExternalSoft.babelConvertSDFtoPDB( prdocking + pose) cpose = PDB.PDB(PDB_input=pposePDB) cpose.renameAtom() cpose.writePDB(pfilout=pclusterpose, conect=1, model=1)
try: solsol_neighbours[sol2].append(sol1) except KeyError: solsol_neighbours.update({sol2: [sol1]}) # end try # end if # end for for key in solsol_neighbours: solsol_neighbours[key] = list(set(solsol_neighbours[key])) # end for solvents = mol_neighbours.keys() residues = sol_neighbours.keys() mdl = PDB(fname) Pred = dict([[str(mdl.resSeq(i)) + ':' + mdl.chainID(i), max(mdl.T()[i], 0)] for i in range(len(mdl))]) def separate_by_chains(S): keys = list(set([res.split(':')[-1] for res in S])) chains = dict([[key, []] for key in keys]) for res in S: key = res.split(':')[-1] chains[key].append(res) # end for return chains # end def
# end for content.append(seq[cuts[-1]:]) content = '\n'.join(content).strip() + '\n' out = out + content return out # end def # read input fname = sys.argv[1] out_root = sys.argv[2] # get sequence mdl = PDB(fname) try: mdl.write("tmp.pdb") except: pass seq = extract_sequence(mdl) # write output chains = seq.keys() for chain in chains: fasta_lines = fasta_format(seq[chain], out_root + '_' + chain) outfile = out_root + '_' + chain + '.fasta' print outfile fout = open(outfile, 'w') fout.writelines(fasta_lines + '\n')
def parse(self): extra_records = [] ## data structures that are saved after parsing models = {} seqres_chains = {} ## running data structures used to accumulate per-chain, per-residue, and per-atom data curr_seqres_chain = None chains = {} curr_model_number = None curr_res_num = None curr_res_name = None curr_atom_list = [] curr_residue_list = [] curr_chain = None chain_ter_seen = {} while 1: line = self._handle.readline() if Common.debug: print "PDBParser line: %s" % line, ## end of file if line == '': if Common.debug: print "PDBParser end of file" print "curr_atom_list is", curr_atom_list print "curr_residue_list is", curr_residue_list print "curr_chain is", curr_chain if curr_atom_list != [] and curr_res_name not in Common.residue_skip_list: addResidueToResidueList(curr_residue_list, curr_res_num, curr_res_name, curr_atom_list) if curr_residue_list != [] and curr_res_name not in Common.residue_skip_list: addChainToChainList(chains, curr_chain, curr_residue_list) ## in a single-model file without MODEL and ENDMDL records, we must set this: if curr_model_number == None: if Common.debug: print "PDBParser end of file, autosetting model 1" models[1] = PDB.Model(chains) break ## if we see a new model record, record its number elif line[:5] == 'MODEL': ## the official format: ## curr_model_number = int(line[10:14]) ## fix/hack due to TAB stored in pdbstyle files: curr_model_number = int(string.split(line)[1]) if Common.debug: print "PDBParse: model entry", curr_model_number continue ## at the end of a new model, clear away state data. ## The TER record before ENDMDL would have dealt with adding the model to the chain elif line[:6] == 'ENDMDL': if Common.debug: print "PDBParse end of model entry", curr_model_number models[curr_model_number] = PDB.Model(chains) ## reset the chain_ter_seen for the new model chains = {} curr_res_num = None curr_res_name = None curr_atom_list = [] curr_residue_list = [] curr_chain = None chain_ter_seen = {} continue elif line[:6] == 'SEQRES': chain_id = line[11] chain_data = string.split(line[19:70]) if Common.debug: print "PDBParser SEQRES:", chain_id, chain_data if Common.debug: print "SEQRES", chain_id, chain_data if curr_seqres_chain == None or curr_seqres_chain != chain_id: if Common.debug: print "Setup new chain", chain_id curr_seqres_chain = chain_id seqres_chains[curr_seqres_chain] = "" if Common.debug: print "Store SEQRES chain '%s' (%s)" % (chain_id, chain_data) for res in chain_data: seqres_chains[curr_seqres_chain] += Common.three_to_one( res) elif line[:4] == 'ATOM' or line[:6] == 'HETATM': atom_num = int(line[6:11]) atom_name = string.strip(line[12:16]) alternate = line[16] res_name = string.strip(line[17:20]) chain_id = line[21] res_num = string.strip(line[22:27]) x = float(line[30:38]) y = float(line[38:46]) z = float(line[46:54]) try: occupancy = float(line[55:60]) except: occupancy = 0.0 try: bfactor = float(line[60:66]) except: bfactor = 0.0 try: element = line[77] except IndexError: element = ' ' ## if Common.debug: print "PDBParser ATOM:", atom_num, atom_name, res_name, chain_id, res_num, x, y, z, occupancy, bfactor, element ## if we've seen a TER record for this chain already, ignore any further entries with that ## chain_id, because these are just associated ions which will not contribute to the sequence ## this is broken if people re-use chain IDs, but that would be... stupid! if chain_ter_seen.has_key(chain_id): continue ## new chain; store previously accumulated residues if curr_chain != chain_id and curr_res_name not in Common.residue_skip_list: if Common.debug: print "New chain", chain_id, "curr chain", curr_chain if curr_chain != None: ## build a new chain object ## store all the accumulated residues into this chain if curr_atom_list != None and curr_res_num != None and curr_res_name != None: addResidueToResidueList(curr_residue_list, curr_res_num, curr_res_name, curr_atom_list) if Common.debug: print "Adding residues to chain %s: %s" % ( curr_chain, map(lambda x: (x.getName(), x.getNumber()), curr_residue_list)) addChainToChainList(chains, curr_chain, curr_residue_list) ## store the new current chain curr_chain = chain_id ## clear the accumulated residue curr_residue_list = [] ## clear the current residue number curr_res_num = None curr_res_name = None ## new residue; store the previously accumulated atoms if curr_res_num != res_num: ## finished parsing a residue, so build a new ## residue object and store all the accumulated ## atoms into this residue if curr_res_num != None and curr_res_name not in Common.residue_skip_list: addResidueToResidueList(curr_residue_list, curr_res_num, curr_res_name, curr_atom_list) if Common.debug: print "New residue", res_num ## rememebr the new current residue curr_res_num = res_num curr_res_name = res_name ## clear the accumulated atom list curr_atom_list = [] ## build a new atom object if Common.debug: print "New atom: '%s', '%s', '%s', '%s', '%s'" % ( atom_num, atom_name, alternate, res_name, res_num) isHetatm = 0 if line[:6] == 'HETATM': isHetatm = 1 atom = PDB.Atom(serial=atom_num, name=atom_name, alternate=alternate, residue=None, coords=(x, y, z), occupancy=occupancy, bfactor=bfactor, element=element, isHetatm=isHetatm) curr_atom_list.append(atom) ## when we see a TER record, save the chain it came from ## from because some PDB files put ions 'associated' with the chain ## after the TER record: ## ATOM 3660 OXT GLN B 502 129.488 87.534 67.598 1.00168.80 O ## TER 3661 GLN B 502 ## HETATM 3662 S SO4 B 1 99.307 73.882 58.307 1.00 63.92 S elif line[:3] == 'TER': if Common.debug: print "PDBParser: TER on chain", chain_id try: chain_id = line[21] chain_ter_seen[chain_id] = 1 except: chain_ter_seen[curr_chain] = 1 addResidueToResidueList(curr_residue_list, curr_res_num, curr_res_name, curr_atom_list) addChainToChainList(chains, curr_chain, curr_residue_list) curr_res_num = None curr_res_name = None curr_atom_list = [] curr_residue_list = [] elif line[:4] == 'END ' or line[:6] == 'CONECT': pass else: extra_records.append(line) ## Build the sequences after everything else is done ## (I did this in case the SEQRES came after ATOM records, even though that's illegal) for chain_id in chains.keys(): ## store the per-chain seqres data in the chain if seqres_chains.has_key(chain_id): if Common.debug: print "Storing SEQRES chain '%s' (%s)" % ( chain_id, seqres_chains[chain_id]) chains[chain_id].setSeqres(seqres_chains[chain_id]) return PDB.PDB(models, extra_records)
return mean(v) # end def def find_quartiles(S): mid = median(S) lower = [t for t in S if t < mid] upper = [t for t in S if t > mid] return median(lower), mid, median(upper) # end def root = pred_fname.split('/')[-1] # 1. check if length of pdb file and entropy file match entropy_data = open(entropy_fname).read().strip().split('\n')[1:] entropy_data = [line.split() for line in entropy_data] mdl = PDB(pred_fname) # 1.1 read depth prediction prediction = [mdl.T(i) for i in range(len(mdl)) if mdl.name(i) == 'CA'] # 1.2 read entropies <- # entropies = [float(t[1]) for t in entropy_data] # following 10 lines make up for NA value in entropies (for benchmarking set only <- with unannotated residues) buffer_entropy = [t[1] for t in entropy_data] u = mean_float(buffer_entropy) entropies = [] for t in buffer_entropy: try: entropies.append(float(t)) except ValueError: entropies.append(u) # end try
CF.cleanexit('PDB code not found.') args.FILENAME = '{}.fasta'.format(args.PDB) else: CF.cleanexit('Your PDB code should be four letters long.') if args.PDB != "None" and args.FILENAME != "None.fasta": message = 'Entered both a PDB code and a default file name. Using the PDB code, {}'.format( args.PDB) print(message) programstart = time.time() #If command line variables are specified, use those. if len(sys.argv) > 1: MainProgram = CF.CF(settings=args) if args.PDB != "None": PDBProgram = PDB.PDB(settings=args) #If no command line variables are specified, use config file and defaults else: print 'reading settings from configfile (' + configfile + ')' MainProgram = CF.CF(defaults=defaults, configfile=configfile) if args.PDB != "None": PDBProgram = PDB.PDB(defaults=defaults, configfile=configfile) programend = time.time() print '\nConsensus Finder Completed.' os.rename(HOME + '/uploads/' + MainProgram.settings.FILENAME, HOME + '/completed/' + MainProgram.settings.FILENAME) for i in MainProgram.output[:]: print(i) print 'Your results are in the ./completed/ directory.' print ''.join(MainProgram.warnings) print 'Process took ' + str(int(programend - programstart)) + ' seconds'
def ligRMSFShaEP(self, RMSF=1, ShaEPScore=1): prLig = self.dMD["prRMSD"] + "ligand/" pathFolder.createFolder(prLig) try: print self.prSuperMatrix except: print self.Superimpose(0) # open reference frame nframeref = str("%05d" % (0)) pframeref = self.dMD["prLig"] + "LGD_" + nframeref + ".pdb" cligref = PDB.PDB(PDB_input=pframeref) cligref.get_lAtoms() if RMSF == 1 and ShaEPScore == 1: pfiloutRMSF = prLig + "ligRMSF" pfiloutShaEP = prLig + "ligShaEP" if path.exists(pfiloutShaEP) and path.exists(pfiloutRMSF): return if RMSF == 1: pfiloutRMSF = prLig + "ligRMSF" filoutRMSF = open(pfiloutRMSF, "w") filoutRMSF.write("Atom\tRMSF\n") if ShaEPScore == 1: pfiloutShaEP = prLig + "ligShaEP" filoutShaEP = open(pfiloutShaEP, "w") filoutShaEP.write("Time\tESPscore\tShape\n") prtemp = prLig + "temp/" pathFolder.createFolder(prtemp, clean=1) dRMSF = {} i = self.stepFrame imax = float(self.MDtime) / float(self.timeframe) while i < imax: print i, imax, "frame" nframe2 = str("%05d" % (i)) pframe2 = self.dMD["prLig"] + "LGD_" + nframe2 + ".pdb" clig2 = PDB.PDB(PDB_input=pframe2) clig2.get_lAtoms() pmatrix = self.prSuperMatrix + str(nframeref) + "_" + str(nframe2) # apply matrix on frame 2 matrixload = toolbox.loadMatrixTMalign(pmatrix) for atomlig2 in clig2.latom: atomlig2.applyMatrixRotTransloc(matrixload) if RMSF == 1: print len(cligref.latom), len(clig2.latom) for atomRef in cligref.latom: name = str(atomRef.name) for atomframe in clig2.latom: if name == atomframe.name: RMSD = calculate.RMSDTwoList([atomRef], [atomframe]) if not name in dRMSF.keys(): dRMSF[name] = [] dRMSF[name].append(RMSD[0]) break if ShaEPScore == 1: pathFolder.cleanFolder(prtemp) ptempref = prtemp + pframeref.split("/")[-1] ptempframe2 = prtemp + pframe2.split("/")[-1] copyfile(pframeref, ptempref) clig2.writePDB(ptempframe2, conect=1) runExternalSoft.runShaep(ptempref, ptempframe2, prtemp + "shaep-out") doutShaep = parseShaep.parseOutputShaep(prtemp + "shaep-out") filoutShaEP.write( str(i / 100.0) + "\t" + str(doutShaep["ESP_similarity"]) + "\t" + str(doutShaep["shape_similarity"]) + "\n") pathFolder.cleanFolder(prtemp) i += self.stepFrame if ShaEPScore == 1: filoutShaEP.close() runExternalSoft.scatterplotShaEP(pfiloutShaEP) if RMSF == 1: for natom in dRMSF.keys(): linew = str(natom) + "\t" + str(average(dRMSF[natom])) + "\n" filoutRMSF.write(linew) filoutRMSF.close() runExternalSoft.RMSFLig(pfiloutRMSF)
def protResRMSF(self): prResidues = self.dMD["prRMSD"] + "residues/" pathFolder.createFolder(prResidues) try: print self.prSuperMatrix except: print self.Superimpose(0) pfilout = prResidues + "resRMSD" if path.exists(pfilout): return else: filout = open(pfilout, "w") filout.write("NameRes\tall\tCa\tDmax\n") # open reference frame nframeref = str("%05d" % (0)) pframeref = self.dMD["prframe"] + "frame_" + nframeref + ".pdb" cprotref = PDB.PDB(PDB_input=pframeref) cprotref.get_byres() dRMSFres = {} i = self.stepFrame imax = float(self.MDtime) / float(self.timeframe) while i < imax: print i, imax, "frame" nframe2 = str("%05d" % (i)) pframe2 = self.dMD["prframe"] + "frame_" + nframe2 + ".pdb" cprot2 = PDB.PDB(PDB_input=pframe2) cprot2.get_lAtoms() pmatrix = self.prSuperMatrix + str(nframeref) + "_" + str(nframe2) # apply matrix on frame 2 matrixload = toolbox.loadMatrixTMalign(pmatrix) for atomprot2 in cprot2.latom: atomprot2.applyMatrixRotTransloc(matrixload) cprot2.get_byres() for resname in cprot2.byres: res = resname.split("_")[0] if res in PDB.LRESSHORT: numres = int(resname.split("_")[1]) if not numres in dRMSFres.keys(): dRMSFres[numres] = {} dRMSFres[numres]["all"] = [] dRMSFres[numres]["Ca"] = [] dRMSFres[numres]["Dmax"] = [] RMSDRes = calculate.RMSDTwoList(cprotref.byres[resname], cprot2.byres[resname]) dRMSFres[numres]["all"].append(RMSDRes[0]) dRMSFres[numres]["Ca"].append(RMSDRes[1]) dRMSFres[numres]["Dmax"].append(RMSDRes[3]) i += self.stepFrame orderednum = sorted(dRMSFres.keys()) for num in orderednum: filout.write( str(num) + "\t" + str(average(dRMSFres[num]["all"])) + "\t" + str(average(dRMSFres[num]["Ca"])) + "\t" + str(average(dRMSFres[num]["Dmax"])) + "\n") filout.close() runExternalSoft.runScatterplotRMSF(pfilout)
def FPIbycluster(self, pprot): d_FPI = {} prFPI = self.prout + "FPI/" pathFolder.createFolder(prFPI, clean=0) prFPItemp = self.prout + "FPItemp/" pathFolder.createFolder(prFPItemp, clean=1) lposesAll = listdir(self.prdocking) proutFPI = self.prout + "FPI/" pathFolder.createFolder(proutFPI) pfiloutaveragecluster = proutFPI + "meansInteraction" filoutaveragecluster = open(pfiloutaveragecluster, "w") lresglobal = [] for cluster in self.clusters.keys(): if not cluster in d_FPI.keys(): d_FPI[cluster] = {} lposecluster = [] for compound in self.clusters[cluster]: for poseAll in lposesAll: pposetemp = self.prdocking + poseAll if search(compound, poseAll) and search("pdb", poseAll): if not pposetemp in lposecluster: lposecluster.append(pposetemp) # add pose in protein file lResBS = [] for posecluster in lposecluster: cProt = PDB.PDB(pprot, hydrogen=1) cProt.addLigand(posecluster) CFPI = FPI.ligFPI(cProt, prFPItemp, ligID="LIG") CFPI.computeFPI(clean=1) print CFPI d_FPI[cluster][posecluster] = CFPI ligID = CFPI.FPI.keys()[0] for resBS in CFPI.FPI[ligID]: if not resBS in lResBS: lResBS.append(resBS) if not resBS in lresglobal: lresglobal.append(resBS) # write out put filout = open(proutFPI + "fpi_out_" + str(cluster), "w") filout.write("pose\t" + " ".join(lResBS) + "\n") for pose in lposecluster: filout.write(pose.split("/")[-1]) for resBS in lResBS: if not resBS in d_FPI[cluster][pose].FPI[ligID].keys(): filout.write("\t0000000") else: filout.write( "\t" + str(d_FPI[cluster][pose].FPI[ligID][resBS])) filout.write("\n") filout.close() # average interaction filoutaveragecluster.write("Clusters\t" + "\t".join(lresglobal) + "\n") for cluster in d_FPI.keys(): filoutaveragecluster.write(str(cluster)) for resglobal in lresglobal: lbit = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # for pose in d_FPI[cluster].keys(): if resglobal in d_FPI[cluster][pose].FPI[ligID].keys(): for i in range(0, 7): print lbit[i], d_FPI[cluster][pose].FPI[ligID][ resglobal][i] lbit[i] = float(lbit[i]) + float( d_FPI[cluster][pose].FPI[ligID][resglobal][i]) for i in range(0, 7): lbit[i] = str(lbit[i] / len(d_FPI[cluster].keys())) else: lbit = [str(i) for i in lbit] filoutaveragecluster.write("\t" + " ".join(lbit)) filoutaveragecluster.write("\n") filoutaveragecluster.close()
os.mkdir(downloaded_pdb) os.mkdir(required_chains) os.mkdir(filter_output) os.mkdir(good_chains) os.mkdir(multi_ion_chains) os.mkdir(potential_profile_output) os.mkdir(simplex_output) os.mkdir(final_data) os.mkdir(FreeSASA_Input) os.mkdir(FreeSASA_Output) Information = [] chainGroups = [] for item in PDBPool: item = item.lower() ClassPDB = PDB(item, False, downloaded_pdb, 'pdb', True, abbr) ClassPDB.GetPDB() entry = 'pdb' + item + '.ent' RawPDBPath = downloaded_pdb + item chains = ClassPDB.GetChainNames(abbr) reqChains, chainGroup = ClassPDB.GetUniqueChains( downloaded_pdb, item, chains) chainGroups.append(chainGroup) info = item, chains, reqChains, chainGroup Information.append(info) for chain in reqChains: chain = chain.split('_') pdb, chain = chain[0], chain[1] outfname = pdb + "_" + chain + '.pdb' outPDB = required_chains + outfname ClassPDB = PDB(pdb, False, downloaded_pdb, 'pdb', True, abbr)