def applyTMAlignList(l_pr_ref, pr_out): pathManage.generatePath(pr_out) nb_pr_ref = len(l_pr_ref) d_out = {} i = 0 while i < nb_pr_ref: j = i + 1 PDB1 = l_pr_ref[i].split("/")[-1][0:4] # print PDB1 while j < nb_pr_ref: PDB2 = l_pr_ref[j].split("/")[-1][0:4] # folder TM align pr_alignement = pr_out + PDB1 + "__" + PDB2 + "/" #print pr_alignement #print PDB1,i, PDB2,j #print l_pr_ref[i] # RUN out_file = runOtherSoft.runTMalign(l_pr_ref[i], l_pr_ref[j], pr_alignement) # clean folders -> pb with several run -> clean too fast -> try / except try: CleanResultTMalign(pr_alignement) except: pass # parse result if not PDB1 in d_out.keys(): if not PDB2 in d_out.keys(): d_out[PDB1] = {} d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign( out_file[-1]) else: d_out[PDB2][PDB1] = {} d_out[PDB2][PDB1] = parseTMalign.parseOutputTMalign( out_file[-1]) else: d_out[PDB1][PDB2] = {} d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign( out_file[-1]) j = j + 1 i = i + 1 return d_out
def Superimpose(self, refFrame): prSuperimposed = self.dMD["prRMSD"] + "superimpose/" pathFolder.createFolder(prSuperimposed) self.prSuperMatrix = prSuperimposed # control to superimposition file exist nbmatrix = float(len(listdir(prSuperimposed))) nbth = (float(self.MDtime) / float(self.timeframe)) / float( self.stepFrame) - 1 if nbmatrix == nbth: print prSuperimposed print "Full folder" return prtemp = self.dMD["prRMSD"] + "temp/" pathFolder.createFolder(prtemp) pTMalignScore = self.dMD["prRMSD"] + "TMalignScore" if not path.exists(pTMalignScore): TMalignScore = open(pTMalignScore, "w") TMalignScore.write("Frame\tRMSD\tTMscore\n") else: TMalignScore = open(pTMalignScore, "a") i = self.stepFrame imax = float(self.MDtime) / float(self.timeframe) while i < imax: print i, imax nframe1 = str("%05d" % (refFrame)) nframe2 = str("%05d" % (i)) pframe1 = self.dMD["prframe"] + "frame_" + nframe1 + ".pdb" pframe2 = self.dMD["prframe"] + "frame_" + nframe2 + ".pdb" # control if existing pmatrix = prSuperimposed + str(nframe1) + "_" + str(nframe2) if path.exists(pmatrix) and path.getsize(pmatrix) > 0: i += self.stepFrame continue else: pathFolder.createFolder(prtemp, clean=1) # clean folder temp lsuperimposed = runExternalSoft.runTMalign( pframe2, pframe1, prtemp) dalign = parseTMalign.parseOutputTMalign(lsuperimposed[-1]) TMalignScore.write( str(nframe2) + "\t" + str(dalign["RMSD"]) + "\t" + str(dalign["TMscore1"]) + "\n") move(lsuperimposed[-2], pmatrix) i += self.stepFrame TMalignScore.close()
def applyTMAlignList (l_pr_ref, pr_out): pathManage.generatePath(pr_out) nb_pr_ref = len (l_pr_ref) d_out = {} i = 0 while i < nb_pr_ref : j = i + 1 PDB1 = l_pr_ref[i].split ("/")[-1][0:4] # print PDB1 while j < nb_pr_ref : PDB2 = l_pr_ref[j].split ("/")[-1][0:4] # folder TM align pr_alignement = pr_out + PDB1 + "__" + PDB2 + "/" #print pr_alignement #print PDB1,i, PDB2,j #print l_pr_ref[i] # RUN out_file = runOtherSoft.runTMalign(l_pr_ref[i], l_pr_ref[j], pr_alignement) # clean folders -> pb with several run -> clean too fast -> try / except try : CleanResultTMalign (pr_alignement) except : pass # parse result if not PDB1 in d_out.keys () : if not PDB2 in d_out.keys () : d_out[PDB1] = {} d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign(out_file[-1]) else : d_out[PDB2][PDB1] = {} d_out[PDB2][PDB1] = parseTMalign.parseOutputTMalign(out_file[-1]) else : d_out[PDB1][PDB2] = {} d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign(out_file[-1]) j = j + 1 i = i + 1 return d_out
def analysisBS (name_lig, ID_seq = '0.0', debug = 1): pr_result = pathManage.result(name_lig) pr_out = pathManage.result(name_lig + "/sameBS") # log files p_log_file = pr_out + "log.txt" filout_log = open (p_log_file, "w") # dictionnar with files d_file_BS = {} d_file_BS["global"] = open (pr_out + name_lig + "_", "w") d_file_BS["global"].write ("name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n") d_file_BS["summary"] = open (pr_out + "summary.txt", "w") pr_dataset = pathManage.dataset(name_lig) l_folder_ref = listdir(pr_result) nb_BS = 0 nb_BS_filtered = 0 nb_same_BS = 0 for PDB_ref in l_folder_ref : if debug : print PDB_ref if len (PDB_ref) != 4 : continue p_pdb_ref = pathManage.findPDBRef(pr_dataset + PDB_ref + "/") l_p_query = pathManage.findPDBQueryTransloc (pathManage.result(name_lig) + PDB_ref + "/") if debug : print l_p_query for p_query in l_p_query : # read TM Align if debug : print p_query.split ("/")[-1][7:-4] p_TMalign = pathManage.alignmentOutput(name_lig) + p_pdb_ref.split ("/")[-1][0:-4] + "__" + p_query.split ("/")[-1][7:-4] + "/RMSD" try : score_align = parseTMalign.parseOutputTMalign(p_TMalign) except : filout_log.write ("ERROR TM align " + p_TMalign + "\n") continue nb_BS = nb_BS + 1 if score_align["IDseq"] >= ID_seq : nb_BS_filtered = nb_BS_filtered + 1 l_p_substruct_ref = pathManage.findSubstructRef (pr_dataset + PDB_ref + "/", name_lig) # sub BS for p_substruct_ref in l_p_substruct_ref : struct_substitued = p_substruct_ref.split ("_")[-2] # write header if not struct_substitued in d_file_BS.keys () : d_file_BS[struct_substitued] = open (pr_out + name_lig + "_" + struct_substitued + "_", "w") d_file_BS[struct_substitued].write ("name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n") RMSD_bs = analysis.computeRMSDBS (p_pdb_ref, p_query, p_substruct_ref, pr_out) if RMSD_bs != [] : d_file_BS[struct_substitued].write (p_substruct_ref.split("/")[-1][0:-4] + "_*_" + p_query.split ("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs[1]) + "\t" + str(RMSD_bs[0]) + "\t" + str(RMSD_bs[2]) + "\t" + str(RMSD_bs[-2]) + "\t" + str(RMSD_bs[-1]) + "\n") p_ligand_ref = pathManage.findligandRef(pr_dataset + PDB_ref + "/", name_lig) RMSD_bs_lig = analysis.computeRMSDBS (p_pdb_ref, p_query, p_ligand_ref, pr_out) if RMSD_bs_lig != [] : d_file_BS["global"].write (p_ligand_ref.split("/")[-1][0:-4] + "_*_" + p_query.split ("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs_lig[1]) + "\t" + str(RMSD_bs_lig[0]) + "\t" + str(RMSD_bs_lig[2]) + "\t" + str(RMSD_bs_lig[-2]) + "\t" + str(RMSD_bs_lig[-1]) + "\n") if RMSD_bs_lig [-1] == 1 : nb_same_BS = nb_same_BS + 1 # write summary d_file_BS["summary"].write ("BS global: " + str (nb_BS) + "\n") d_file_BS["summary"].write ("BS - IDseq " + str (ID_seq) + "%: " + str (nb_BS_filtered) + "\n") d_file_BS["summary"].write ("BS - same atom number: " + str (nb_same_BS) + "\n") filout_log.close () # close files and run histograms for k_dico in d_file_BS.keys () : p_file = d_file_BS[k_dico].name d_file_BS[k_dico].close () if name_lig == "ATP" : runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 5.0) elif name_lig == "ADP" : runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 4.0) elif name_lig == "AMP" : runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 4.0) else : runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 3.5) return 1
def analysisBS(name_lig, ID_seq='0.0', debug=1): pr_result = pathManage.result(name_lig) pr_out = pathManage.result(name_lig + "/sameBS") # log files p_log_file = pr_out + "log.txt" filout_log = open(p_log_file, "w") # dictionnar with files d_file_BS = {} d_file_BS["global"] = open(pr_out + name_lig + "_", "w") d_file_BS["global"].write( "name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n" ) d_file_BS["summary"] = open(pr_out + "summary.txt", "w") pr_dataset = pathManage.dataset(name_lig) l_folder_ref = listdir(pr_result) nb_BS = 0 nb_BS_filtered = 0 nb_same_BS = 0 for PDB_ref in l_folder_ref: if debug: print PDB_ref if len(PDB_ref) != 4: continue p_pdb_ref = pathManage.findPDBRef(pr_dataset + PDB_ref + "/") l_p_query = pathManage.findPDBQueryTransloc( pathManage.result(name_lig) + PDB_ref + "/") if debug: print l_p_query for p_query in l_p_query: # read TM Align if debug: print p_query.split("/")[-1][7:-4] p_TMalign = pathManage.alignmentOutput(name_lig) + p_pdb_ref.split( "/")[-1][0:-4] + "__" + p_query.split("/")[-1][7:-4] + "/RMSD" try: score_align = parseTMalign.parseOutputTMalign(p_TMalign) except: filout_log.write("ERROR TM align " + p_TMalign + "\n") continue nb_BS = nb_BS + 1 if score_align["IDseq"] >= ID_seq: nb_BS_filtered = nb_BS_filtered + 1 l_p_substruct_ref = pathManage.findSubstructRef( pr_dataset + PDB_ref + "/", name_lig) # sub BS for p_substruct_ref in l_p_substruct_ref: struct_substitued = p_substruct_ref.split("_")[-2] # write header if not struct_substitued in d_file_BS.keys(): d_file_BS[struct_substitued] = open( pr_out + name_lig + "_" + struct_substitued + "_", "w") d_file_BS[struct_substitued].write( "name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n" ) RMSD_bs = analysis.computeRMSDBS(p_pdb_ref, p_query, p_substruct_ref, pr_out) if RMSD_bs != []: d_file_BS[struct_substitued].write( p_substruct_ref.split("/")[-1][0:-4] + "_*_" + p_query.split("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs[1]) + "\t" + str(RMSD_bs[0]) + "\t" + str(RMSD_bs[2]) + "\t" + str(RMSD_bs[-2]) + "\t" + str(RMSD_bs[-1]) + "\n") p_ligand_ref = pathManage.findligandRef( pr_dataset + PDB_ref + "/", name_lig) RMSD_bs_lig = analysis.computeRMSDBS(p_pdb_ref, p_query, p_ligand_ref, pr_out) if RMSD_bs_lig != []: d_file_BS["global"].write( p_ligand_ref.split("/")[-1][0:-4] + "_*_" + p_query.split("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs_lig[1]) + "\t" + str(RMSD_bs_lig[0]) + "\t" + str(RMSD_bs_lig[2]) + "\t" + str(RMSD_bs_lig[-2]) + "\t" + str(RMSD_bs_lig[-1]) + "\n") if RMSD_bs_lig[-1] == 1: nb_same_BS = nb_same_BS + 1 # write summary d_file_BS["summary"].write("BS global: " + str(nb_BS) + "\n") d_file_BS["summary"].write("BS - IDseq " + str(ID_seq) + "%: " + str(nb_BS_filtered) + "\n") d_file_BS["summary"].write("BS - same atom number: " + str(nb_same_BS) + "\n") filout_log.close() # close files and run histograms for k_dico in d_file_BS.keys(): p_file = d_file_BS[k_dico].name d_file_BS[k_dico].close() if name_lig == "ATP": runOtherSoft.RhistogramRMSD(p_file, max_RMSD=5.0) elif name_lig == "ADP": runOtherSoft.RhistogramRMSD(p_file, max_RMSD=4.0) elif name_lig == "AMP": runOtherSoft.RhistogramRMSD(p_file, max_RMSD=4.0) else: runOtherSoft.RhistogramRMSD(p_file, max_RMSD=3.5) return 1