def applyTMAlign (substruct): p_dir_dataset = pathManage.dataset(substruct) l_folder = listdir(p_dir_dataset) for ref_folder in l_folder: if len (ref_folder) != 4: continue l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") p_pdb_ref = pathManage.findPDBRef(p_dir_dataset + ref_folder + "/") for pdbfile in l_pdbfile: # try if PDB not ligand if len(pdbfile.split ("_")[0]) != 4 or not search (".pdb", pdbfile): continue # same alignment elif p_dir_dataset + ref_folder + "/" + pdbfile == p_pdb_ref: continue else: p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile p_dir_align = pathManage.alignmentOutput(substruct + "/" + p_pdb_ref.split ("/")[-1][:-4] + "__" + p_file_pdb.split ("/")[-1][:-4]) # superimpose runOtherSoft.runTMalign(p_file_pdb, p_pdb_ref, p_dir_align) return 1
def applyTMAlign(substruct): p_dir_dataset = pathManage.dataset(substruct) l_folder = listdir(p_dir_dataset) for ref_folder in l_folder: if len(ref_folder) != 4: continue l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") p_pdb_ref = pathManage.findPDBRef(p_dir_dataset + ref_folder + "/") for pdbfile in l_pdbfile: # try if PDB not ligand if len(pdbfile.split("_")[0]) != 4 or not search(".pdb", pdbfile): continue # same alignment elif p_dir_dataset + ref_folder + "/" + pdbfile == p_pdb_ref: continue else: p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile p_dir_align = pathManage.alignmentOutput( substruct + "/" + p_pdb_ref.split("/")[-1][:-4] + "__" + p_file_pdb.split("/")[-1][:-4]) # superimpose runOtherSoft.runTMalign(p_file_pdb, p_pdb_ref, p_dir_align) return 1
def analysisBS (name_lig, ID_seq = '0.0', debug = 1): pr_result = pathManage.result(name_lig) pr_out = pathManage.result(name_lig + "/sameBS") # log files p_log_file = pr_out + "log.txt" filout_log = open (p_log_file, "w") # dictionnar with files d_file_BS = {} d_file_BS["global"] = open (pr_out + name_lig + "_", "w") d_file_BS["global"].write ("name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n") d_file_BS["summary"] = open (pr_out + "summary.txt", "w") pr_dataset = pathManage.dataset(name_lig) l_folder_ref = listdir(pr_result) nb_BS = 0 nb_BS_filtered = 0 nb_same_BS = 0 for PDB_ref in l_folder_ref : if debug : print PDB_ref if len (PDB_ref) != 4 : continue p_pdb_ref = pathManage.findPDBRef(pr_dataset + PDB_ref + "/") l_p_query = pathManage.findPDBQueryTransloc (pathManage.result(name_lig) + PDB_ref + "/") if debug : print l_p_query for p_query in l_p_query : # read TM Align if debug : print p_query.split ("/")[-1][7:-4] p_TMalign = pathManage.alignmentOutput(name_lig) + p_pdb_ref.split ("/")[-1][0:-4] + "__" + p_query.split ("/")[-1][7:-4] + "/RMSD" try : score_align = parseTMalign.parseOutputTMalign(p_TMalign) except : filout_log.write ("ERROR TM align " + p_TMalign + "\n") continue nb_BS = nb_BS + 1 if score_align["IDseq"] >= ID_seq : nb_BS_filtered = nb_BS_filtered + 1 l_p_substruct_ref = pathManage.findSubstructRef (pr_dataset + PDB_ref + "/", name_lig) # sub BS for p_substruct_ref in l_p_substruct_ref : struct_substitued = p_substruct_ref.split ("_")[-2] # write header if not struct_substitued in d_file_BS.keys () : d_file_BS[struct_substitued] = open (pr_out + name_lig + "_" + struct_substitued + "_", "w") d_file_BS[struct_substitued].write ("name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n") RMSD_bs = analysis.computeRMSDBS (p_pdb_ref, p_query, p_substruct_ref, pr_out) if RMSD_bs != [] : d_file_BS[struct_substitued].write (p_substruct_ref.split("/")[-1][0:-4] + "_*_" + p_query.split ("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs[1]) + "\t" + str(RMSD_bs[0]) + "\t" + str(RMSD_bs[2]) + "\t" + str(RMSD_bs[-2]) + "\t" + str(RMSD_bs[-1]) + "\n") p_ligand_ref = pathManage.findligandRef(pr_dataset + PDB_ref + "/", name_lig) RMSD_bs_lig = analysis.computeRMSDBS (p_pdb_ref, p_query, p_ligand_ref, pr_out) if RMSD_bs_lig != [] : d_file_BS["global"].write (p_ligand_ref.split("/")[-1][0:-4] + "_*_" + p_query.split ("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs_lig[1]) + "\t" + str(RMSD_bs_lig[0]) + "\t" + str(RMSD_bs_lig[2]) + "\t" + str(RMSD_bs_lig[-2]) + "\t" + str(RMSD_bs_lig[-1]) + "\n") if RMSD_bs_lig [-1] == 1 : nb_same_BS = nb_same_BS + 1 # write summary d_file_BS["summary"].write ("BS global: " + str (nb_BS) + "\n") d_file_BS["summary"].write ("BS - IDseq " + str (ID_seq) + "%: " + str (nb_BS_filtered) + "\n") d_file_BS["summary"].write ("BS - same atom number: " + str (nb_same_BS) + "\n") filout_log.close () # close files and run histograms for k_dico in d_file_BS.keys () : p_file = d_file_BS[k_dico].name d_file_BS[k_dico].close () if name_lig == "ATP" : runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 5.0) elif name_lig == "ADP" : runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 4.0) elif name_lig == "AMP" : runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 4.0) else : runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 3.5) return 1
def globalArrangement (pr_orgin, p_smile, p_family, name_ligand, l_ligand_out): # print "--------" # print pr_orgin # print p_smile # print p_family # print name_ligand # print "--------" subst = p_smile.split ("_")[-3] filin = open (p_smile, "r") l_line_smile = filin.readlines () filin.close() for line_smile in l_line_smile : # search substructure # print line_smile l_PDB_query = line_smile.split ("\t")[-3].split (" ") # print l_PDB_query l_PDB_ref = line_smile.split ("\t")[-2].split (" ") l_ligand = line_smile.strip().split ("\t")[-1].split (" ") # search replacement smile = line_smile.split ("\t")[0] # search if LSR is small -> thresold < 3 small_LSR = smileAnalysis.smallLSR (smile) if subst == "ribose" : if small_LSR == 1 : first_folder = "ribose_small" else : first_folder = "ribose" else : if small_LSR == 1 : first_folder = "Pi_small" else : first_folder = "Pi" print smile, l_PDB_query, l_PDB_ref, l_ligand, subst, small_LSR replacement, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand) # case with cycle -> search replacement 2 if replacement == "cycle" : replacement2, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand, in_cycle = 1) replacement = replacement + "/" + replacement2 # new folder # case metal if replacement == "metal" : print metal, l_PDB_query, l_PDB_ref, name_ligand len_find = len (l_PDB_ref) i = 0 while i < len_find : # exclusion of ligand out if l_ligand[i] in l_ligand_out : i = i + 1 continue group, family = analysis.findFamilyAndGroup(l_PDB_ref[i]) # folder reference pr_dataset = pathManage.dataset(name_ligand + "/" + l_PDB_ref[i]) PDB_ref = pathManage.findPDBRef(pr_dataset) p_ligand_ref = pathManage.findligandRef(pr_dataset , name_ligand) l_frag_ref = pathManage.findSubstructRef(pr_dataset, name_ligand) for f_ref in l_frag_ref : if search (subst, f_ref) : p_frag_ref = f_ref break # folder_query pr_result = pathManage.result(name_ligand + "/" + l_PDB_ref[i]) l_protein_tranloc = pathManage.findPDBQueryTransloc(pr_result) for p_t in l_protein_tranloc : if search (l_ligand[i], p_t) and search (l_PDB_query[i], p_t) : p_protein_query = p_t break if replacement != "metal" : p_lig_query = pathManage.findligandQuery(pr_dataset , l_ligand[i], l_PDB_query[i]) else : p_lig_query = pathManage.findligandQuery(pr_dataset ,metal, l_PDB_query[i]) # need apply transloc matrix matrix_transloc = pathManage.findMatrix(p_ligand_ref, p_lig_query, name_ligand) lig_query_parsed = parsePDB.loadCoordSectionPDB(p_lig_query) try : superposeStructure.applyMatrixLigand(lig_query_parsed, matrix_transloc) except : i = i + 1 continue p_lig_substituate = pathManage.findSubstructFind(pr_result, l_ligand[i], l_PDB_query[i], subst) l_p_BS = pathManage.findFileBS(pr_result, l_PDB_query[i]) for BS in l_p_BS : if search (l_ligand[i], BS) : p_BS = BS break # print pr_final # print "***************" # print PDB_ref # print p_ligand_ref # print p_frag_ref # print "----" # print p_protein_query # print p_lig_query # print p_lig_substituate # print p_BS # print "**************" # ajouter group + family 2 lettre pr_final = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/" pr_ligand = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/LGD/" pr_BS = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/BS/" pr_sust = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/LSR/" if not path.isdir(pr_final): makedirs (pr_final) if not path.isdir(pr_ligand): makedirs (pr_ligand) if not path.isdir(pr_BS): makedirs (pr_BS) if not path.isdir(pr_sust): makedirs (pr_sust) # list file p_list_smile_queries = pr_sust + "list.smile" if not path.exists(p_list_smile_queries) : file_smile_queries = open (p_list_smile_queries, "w") else : file_smile_queries = open (p_list_smile_queries, "a") file_smile_queries.write (str(smile) + "\n") file_smile_queries.close () # lig de la query writePDBfile.coordinateSection(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], lig_query_parsed, recorder = "HETATM", header = "LCG_" + p_lig_query.split ("/")[-1], connect_matrix = 1) runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], clean_smi = 1) # lig de reference + smile copy2(p_ligand_ref, pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1]) runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1]) # LSR de ref copy2(p_frag_ref, pr_sust + "LSR_REF_" + name_ligand + "_" + l_PDB_ref[i] + ".pdb") # protein query #copy2(p_protein_query, pr_final) # LSR query -> p_lig_ref only for the name copy2(p_lig_substituate, pr_sust + "LSR_" + subst + "_" + p_lig_query.split ("/")[-1]) # BS query copy2(p_BS, pr_BS) # BS from reference l_atom_BS = parsePDB.computeBS (PDB_ref, p_ligand_ref, thresold = 4.50, option_onlyATOM = 0) writePDBfile.coordinateSection(pr_BS + "BS_REF_" + name_ligand + "_" + PDB_ref.split ("/")[-1], l_atom_BS, recorder = "ATOM", header = "BS_REF_" + name_ligand + "_" + PDB_ref, connect_matrix = 0) i = i + 1 return 1
def analysisBS(name_lig, ID_seq='0.0', debug=1): pr_result = pathManage.result(name_lig) pr_out = pathManage.result(name_lig + "/sameBS") # log files p_log_file = pr_out + "log.txt" filout_log = open(p_log_file, "w") # dictionnar with files d_file_BS = {} d_file_BS["global"] = open(pr_out + name_lig + "_", "w") d_file_BS["global"].write( "name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n" ) d_file_BS["summary"] = open(pr_out + "summary.txt", "w") pr_dataset = pathManage.dataset(name_lig) l_folder_ref = listdir(pr_result) nb_BS = 0 nb_BS_filtered = 0 nb_same_BS = 0 for PDB_ref in l_folder_ref: if debug: print PDB_ref if len(PDB_ref) != 4: continue p_pdb_ref = pathManage.findPDBRef(pr_dataset + PDB_ref + "/") l_p_query = pathManage.findPDBQueryTransloc( pathManage.result(name_lig) + PDB_ref + "/") if debug: print l_p_query for p_query in l_p_query: # read TM Align if debug: print p_query.split("/")[-1][7:-4] p_TMalign = pathManage.alignmentOutput(name_lig) + p_pdb_ref.split( "/")[-1][0:-4] + "__" + p_query.split("/")[-1][7:-4] + "/RMSD" try: score_align = parseTMalign.parseOutputTMalign(p_TMalign) except: filout_log.write("ERROR TM align " + p_TMalign + "\n") continue nb_BS = nb_BS + 1 if score_align["IDseq"] >= ID_seq: nb_BS_filtered = nb_BS_filtered + 1 l_p_substruct_ref = pathManage.findSubstructRef( pr_dataset + PDB_ref + "/", name_lig) # sub BS for p_substruct_ref in l_p_substruct_ref: struct_substitued = p_substruct_ref.split("_")[-2] # write header if not struct_substitued in d_file_BS.keys(): d_file_BS[struct_substitued] = open( pr_out + name_lig + "_" + struct_substitued + "_", "w") d_file_BS[struct_substitued].write( "name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n" ) RMSD_bs = analysis.computeRMSDBS(p_pdb_ref, p_query, p_substruct_ref, pr_out) if RMSD_bs != []: d_file_BS[struct_substitued].write( p_substruct_ref.split("/")[-1][0:-4] + "_*_" + p_query.split("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs[1]) + "\t" + str(RMSD_bs[0]) + "\t" + str(RMSD_bs[2]) + "\t" + str(RMSD_bs[-2]) + "\t" + str(RMSD_bs[-1]) + "\n") p_ligand_ref = pathManage.findligandRef( pr_dataset + PDB_ref + "/", name_lig) RMSD_bs_lig = analysis.computeRMSDBS(p_pdb_ref, p_query, p_ligand_ref, pr_out) if RMSD_bs_lig != []: d_file_BS["global"].write( p_ligand_ref.split("/")[-1][0:-4] + "_*_" + p_query.split("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs_lig[1]) + "\t" + str(RMSD_bs_lig[0]) + "\t" + str(RMSD_bs_lig[2]) + "\t" + str(RMSD_bs_lig[-2]) + "\t" + str(RMSD_bs_lig[-1]) + "\n") if RMSD_bs_lig[-1] == 1: nb_same_BS = nb_same_BS + 1 # write summary d_file_BS["summary"].write("BS global: " + str(nb_BS) + "\n") d_file_BS["summary"].write("BS - IDseq " + str(ID_seq) + "%: " + str(nb_BS_filtered) + "\n") d_file_BS["summary"].write("BS - same atom number: " + str(nb_same_BS) + "\n") filout_log.close() # close files and run histograms for k_dico in d_file_BS.keys(): p_file = d_file_BS[k_dico].name d_file_BS[k_dico].close() if name_lig == "ATP": runOtherSoft.RhistogramRMSD(p_file, max_RMSD=5.0) elif name_lig == "ADP": runOtherSoft.RhistogramRMSD(p_file, max_RMSD=4.0) elif name_lig == "AMP": runOtherSoft.RhistogramRMSD(p_file, max_RMSD=4.0) else: runOtherSoft.RhistogramRMSD(p_file, max_RMSD=3.5) return 1
def analyseIons (pr_dataset, name_ligand, p_filout, thresold_max_interaction = 4.0) : l_folder_ref = listdir(pr_dataset) filout = open (p_filout, "w") if name_ligand == "ATP" : filout.write ("PDB\tIon\tD1\tD2\tD3\tAngle1\tAngle2\tAt1\tAt2\tA3\n") else : filout.write ("PDB\tIon\tD1\tD2\tAngle\tAt1\tAt2\n") # dictionnary of counting d_count = {} d_count["CX"] = 0 d_count["CX + ions"] = 0 d_count["BS + ions"] = 0 d_count["BS + 1-ion"] = 0 d_count["BS + 2-ions"] = 0 d_count["BS + more-ions"] = 0 d_count["Interact-1"] = 0 d_count["Interact-2"] = 0 # dictionnary by ions d_ions = {} for ref_folder in l_folder_ref : only_one = 0 if len (ref_folder) != 4 : continue d_count["CX"] = d_count["CX"] + 1 l_temp = [] # path and complex p_lig_ref = pathManage.findligandRef(pr_dataset + ref_folder + "/", name_ligand) p_complex = pathManage.findPDBRef(pr_dataset + ref_folder + "/") # parsing lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM") l_het_parsed = parsePDB.loadCoordSectionPDB(p_complex, "HETATM") # retrieve phosphate l_pi = retrieveTwoAtomForAngle (lig_ref_parsed, name_ligand) if l_pi == [] : # case ligand without phosphate continue flag_interact = 0 flag_between_1 = 0 flag_between_2 = 0 for het_parsed in l_het_parsed : if het_parsed["resName"] in l_ions : d_count["CX + ions"] = d_count["CX + ions"] + 1 if not het_parsed ["resName"] in d_ions.keys () : d_ions[het_parsed["resName"]] = 0 if not het_parsed["resName"] in l_temp : d_ions[het_parsed["resName"]] = d_ions[het_parsed["resName"]] + 1 l_temp.append (het_parsed["resName"]) PDB_id = ref_folder d1 = parsePDB.distanceTwoatoms(l_pi[0], het_parsed) d2 = parsePDB.distanceTwoatoms(l_pi[1], het_parsed) if name_ligand == "ATP" : # print len(l_pi), ref_folder, p_lig_ref d3 = parsePDB.distanceTwoatoms(l_pi[2], het_parsed) angle_bis = parsePDB.angleVector(l_pi[1], het_parsed, l_pi[2]) angle = parsePDB.angleVector(l_pi[0], het_parsed, l_pi[1]) if d1 < 10 and d2 < 10 : if not het_parsed["resName"] in d_count.keys () : d_count[het_parsed["resName"]] = 0 if only_one == 0 : d_count[het_parsed["resName"]] = d_count[het_parsed["resName"]] + 1 only_one = 1 d_count["BS + ions"] = d_count["BS + ions"] + 1 flag_interact = flag_interact + 1 if d1 < thresold_max_interaction and d2 < thresold_max_interaction : flag_between_1 = flag_between_1 + 1 if name_ligand == "ATP" : if d3 < thresold_max_interaction and d2 < thresold_max_interaction : flag_between_2 = flag_between_2 + 1 filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str (d3) + "\t" + str(angle) + "\t" + str(angle_bis) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\t" + str(l_pi[2]["serial"]) + "\n") else : filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str(angle) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\n") if flag_interact == 1 : d_count["BS + 1-ion"] = d_count["BS + 1-ion"] + 1 elif flag_interact == 2 : d_count["BS + 2-ions"] = d_count["BS + 2-ions"] + 1 elif flag_interact > 2 : d_count["BS + more-ions"] = d_count["BS + more-ions"] + 1 if flag_between_1 >= 1 : d_count["Interact-1"] = d_count["Interact-1"] + flag_between_1 if flag_between_2 >= 1 : d_count["Interact-2"] = d_count["Interact-2"] + flag_between_2 filout.close () filout_count = open (p_filout[0:-4] + "count.txt", "w") filout_count.write ("CX: " + str (d_count["CX"]) + "\n") filout_count.write ("CX + ions: " + str (d_count["CX + ions"]) + "\n") filout_count.write ("BS + ions: " + str(d_count["BS + ions"]) + "\n") filout_count.write ("BS + 1-ion: " + str(d_count["BS + 1-ion"]) + "\n") filout_count.write ("BS + 2-ions: " + str(d_count["BS + 2-ions"]) + "\n") filout_count.write ("BS + more-ions: " + str(d_count["BS + more-ions"]) + "\n") filout_count.write ("Interact Pi-alpha + Pi-beta: " + str(d_count["Interact-1"]) + "\n") filout_count.write ("Interact Pi-beta + Pi-gama: " + str(d_count["Interact-2"]) + "\n") filout_count.close () filout_by_ion = open(p_filout[0:-4] + "byIons_" + name_ligand, "w") l_k = d_ions.keys () for k in l_k : filout_by_ion.write (str (k.capitalize()) + "\t" + str (d_ions[k]) + "\n") filout_by_ion.close () runOtherSoft.barplot (p_filout[0:-4] + "byIons_" + name_ligand)