def countingSubstituent (name_final, debug = 1): pr_final_folder = pathManage.result("final_" + name_final) d_count = {} d_lig = {} d_by_ref = {} d_count_pr = {} l_file_final = listdir(pr_final_folder) if debug : print "1", pr_final_folder for pr_type_subref in l_file_final : # case where pr type is a file not a folder try : l_pr_sub = listdir(pr_final_folder + pr_type_subref + "/") except : continue if debug: print "2",pr_final_folder + pr_type_subref + "/" # case cycle append one directory if "cycle" in l_pr_sub : l_pr_sub.remove ("cycle") l_second_sub = listdir (pr_final_folder + pr_type_subref + "/cycle/") for second_sub in l_second_sub : l_pr_sub.append ("cycle/" + second_sub) for pr_sub in l_pr_sub : # case where pr_type_substituent is a folder try : l_pr_PDBref = listdir(pr_final_folder + pr_type_subref + "/" + pr_sub + "/") except : continue if debug : print "3", pr_final_folder + pr_type_subref, pr_sub for pr_PDBref in l_pr_PDBref : PDB_ref = pr_PDBref.split ("_")[-1] family_ref = pr_PDBref.split ("-")[0] group_ref = pr_PDBref.split ("_")[0].split ("-")[-1] pr_LGD = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LGD/" pr_LSR = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LSR/" pr_BS = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/BS/" if debug : print "4",pr_LGD print "4", pr_BS print "4", pr_LSR ################ # folder LSR # ################ l_file_LSR = listdir (pr_LSR) for file_LSR in l_file_LSR : # -> count by type sub reference if search ("LSR_", file_LSR) and file_LSR.split ("_")[1] != "REF" : ligand_sub = file_LSR.split ("_")[1] if debug : print "5", file_LSR if not ligand_sub in d_count.keys () : d_count[ligand_sub] = {} if not pr_sub in d_count[ligand_sub].keys () : d_count[ligand_sub][pr_sub] = 0 d_count[ligand_sub][pr_sub] = d_count[ligand_sub][pr_sub] + 1 ################ # complet LSR # ################ elif search ("LSR", file_LSR): # case LSR reference # ###################### if search ("REF_", file_LSR) : lig_ref = file_LSR.split ("_")[2][:3] if not lig_ref in d_by_ref.keys () : d_by_ref[lig_ref] = {} type_ref = pr_type_subref.split ("_")[0] if not type_ref in d_by_ref[lig_ref].keys () : d_by_ref[lig_ref][type_ref] = 0 d_by_ref[lig_ref][type_ref] = d_by_ref[lig_ref][type_ref] + 1 ################# # folder LGD # ################# l_file_LGD = listdir(pr_LGD) for file_LGD in l_file_LGD : # print file_ref if search ("LGD", file_LGD): ligand = file_LGD.split ("_")[1] if ligand == "REF" : continue if not ligand in d_lig.keys () : d_lig[ligand] = {} d_lig[ligand]["count"] = 0 d_lig[ligand]["group"] = [] d_lig[ligand]["family"] = [] d_lig[ligand]["count"] = d_lig[ligand]["count"] + 1 d_lig[ligand]["family"].append (str(family_ref)) d_lig[ligand]["group"].append (str(group_ref)) ############### # folder BS # ############### l_file_BS = listdir(pr_BS) for file_BS in l_file_BS : if search ("BS_REF", file_BS): lig_ref = file_BS.split ("_")[2] pr_ref = file_BS.split ("_")[3].split (".")[0] print lig_ref, pr_ref, "*****" if not lig_ref in d_count_pr.keys () : d_count_pr[lig_ref] = {} d_count_pr[lig_ref]["pr ref"] = [] d_count_pr[lig_ref]["pr queries"] = [] d_count_pr[lig_ref]["lig queries"] = [] if not pr_ref in d_count_pr[lig_ref]["pr ref"] : d_count_pr[lig_ref]["pr ref"].append (pr_ref) try: family = analysis.findFamily (pr_ref, pathManage.dataset (lig_ref) + "family_PDB.txt") if not family in d_count_pr[lig_ref].keys () : d_count_pr[lig_ref][family] = 0 d_count_pr[lig_ref][family] = d_count_pr[lig_ref][family] + 1 except: pass # BS -> query for file_BS in l_file_BS : # for not reference BS if not search ("BS_REF", file_BS) : lig_querie = file_BS.split ("_")[1] prot_querie = file_BS.split ("_")[2][0:4] print prot_querie, lig_querie, "*******" # find ligand reference # lig ref define in previous step d_count_pr[lig_ref]["pr queries"].append (prot_querie) d_count_pr[lig_ref]["lig queries"].append (lig_querie) # write and plot # ################## pr_result = pathManage.generatePath(pr_final_folder + "counting/") for ligand_sub in d_count.keys () : p_filout = pr_result + ligand_sub filout = open (p_filout, "w") filout.write ("\t".join(d_count[ligand_sub].keys ()) + "\n") l_value = [str(x) for x in d_count[ligand_sub].values ()] filout.write ("\t".join(l_value) + "\n") filout.close () runOtherSoft.piePlot(p_filout) filout_lig = open (pr_result + "count_ligand", "w") filout_lig.write ("Ligand ID\tNumber of occurences in the dataset\tNumber of different clusters\tList of clusters\tList of protein families\n") for lig in d_lig.keys () : if d_lig[lig] > 1 : filout_lig.write (str (lig) + "\t" + str (d_lig[lig]["count"]) + "\t" + str(len (list (set(d_lig[lig]["group"])))) + "\t" + " ".join (d_lig[lig]["group"]) + "\t" + " ".join (d_lig[lig]["family"]) + "\n") filout_lig.close () filout_LSR_lig = open (pr_result + "CountByLigandRef", "w") for lig_ref in d_by_ref.keys () : filout_LSR_lig.write ("====" + str (lig_ref) + "====\n") for sub_ref in d_by_ref[lig_ref].keys () : filout_LSR_lig.write (str (sub_ref) + ": " + str (d_by_ref[lig_ref][sub_ref]) + "\n") filout_LSR_lig.close () filout_pr_count = open (pr_result + "count_pr", "w") for lig in d_count_pr.keys () : filout_pr_count.write ("====" + str (lig) + "====\n") filout_pr_count.write ("nb ref pr: " + str (len (d_count_pr[lig]["pr ref"])) + "\n") filout_pr_count.write ("nb querie pr: " + str (len (d_count_pr[lig]["pr queries"])) + "\n") filout_pr_count.write ("nb ligand queries: " + str (len (d_count_pr[lig]["lig queries"])) + "\n") for family in d_count_pr[lig].keys () : if family != "pr ref" and family != "pr queries" and family != "lig queries" : filout_pr_count.write ("Ref " + str (family) + ": " + str (d_count_pr[lig][family]) + "\n") filout_pr_count.close () runOtherSoft.barplot(pr_result + "count_ligand")
def analyseIons (pr_dataset, name_ligand, p_filout, thresold_max_interaction = 4.0) : l_folder_ref = listdir(pr_dataset) filout = open (p_filout, "w") if name_ligand == "ATP" : filout.write ("PDB\tIon\tD1\tD2\tD3\tAngle1\tAngle2\tAt1\tAt2\tA3\n") else : filout.write ("PDB\tIon\tD1\tD2\tAngle\tAt1\tAt2\n") # dictionnary of counting d_count = {} d_count["CX"] = 0 d_count["CX + ions"] = 0 d_count["BS + ions"] = 0 d_count["BS + 1-ion"] = 0 d_count["BS + 2-ions"] = 0 d_count["BS + more-ions"] = 0 d_count["Interact-1"] = 0 d_count["Interact-2"] = 0 # dictionnary by ions d_ions = {} for ref_folder in l_folder_ref : only_one = 0 if len (ref_folder) != 4 : continue d_count["CX"] = d_count["CX"] + 1 l_temp = [] # path and complex p_lig_ref = pathManage.findligandRef(pr_dataset + ref_folder + "/", name_ligand) p_complex = pathManage.findPDBRef(pr_dataset + ref_folder + "/") # parsing lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM") l_het_parsed = parsePDB.loadCoordSectionPDB(p_complex, "HETATM") # retrieve phosphate l_pi = retrieveTwoAtomForAngle (lig_ref_parsed, name_ligand) if l_pi == [] : # case ligand without phosphate continue flag_interact = 0 flag_between_1 = 0 flag_between_2 = 0 for het_parsed in l_het_parsed : if het_parsed["resName"] in l_ions : d_count["CX + ions"] = d_count["CX + ions"] + 1 if not het_parsed ["resName"] in d_ions.keys () : d_ions[het_parsed["resName"]] = 0 if not het_parsed["resName"] in l_temp : d_ions[het_parsed["resName"]] = d_ions[het_parsed["resName"]] + 1 l_temp.append (het_parsed["resName"]) PDB_id = ref_folder d1 = parsePDB.distanceTwoatoms(l_pi[0], het_parsed) d2 = parsePDB.distanceTwoatoms(l_pi[1], het_parsed) if name_ligand == "ATP" : # print len(l_pi), ref_folder, p_lig_ref d3 = parsePDB.distanceTwoatoms(l_pi[2], het_parsed) angle_bis = parsePDB.angleVector(l_pi[1], het_parsed, l_pi[2]) angle = parsePDB.angleVector(l_pi[0], het_parsed, l_pi[1]) if d1 < 10 and d2 < 10 : if not het_parsed["resName"] in d_count.keys () : d_count[het_parsed["resName"]] = 0 if only_one == 0 : d_count[het_parsed["resName"]] = d_count[het_parsed["resName"]] + 1 only_one = 1 d_count["BS + ions"] = d_count["BS + ions"] + 1 flag_interact = flag_interact + 1 if d1 < thresold_max_interaction and d2 < thresold_max_interaction : flag_between_1 = flag_between_1 + 1 if name_ligand == "ATP" : if d3 < thresold_max_interaction and d2 < thresold_max_interaction : flag_between_2 = flag_between_2 + 1 filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str (d3) + "\t" + str(angle) + "\t" + str(angle_bis) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\t" + str(l_pi[2]["serial"]) + "\n") else : filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str(angle) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\n") if flag_interact == 1 : d_count["BS + 1-ion"] = d_count["BS + 1-ion"] + 1 elif flag_interact == 2 : d_count["BS + 2-ions"] = d_count["BS + 2-ions"] + 1 elif flag_interact > 2 : d_count["BS + more-ions"] = d_count["BS + more-ions"] + 1 if flag_between_1 >= 1 : d_count["Interact-1"] = d_count["Interact-1"] + flag_between_1 if flag_between_2 >= 1 : d_count["Interact-2"] = d_count["Interact-2"] + flag_between_2 filout.close () filout_count = open (p_filout[0:-4] + "count.txt", "w") filout_count.write ("CX: " + str (d_count["CX"]) + "\n") filout_count.write ("CX + ions: " + str (d_count["CX + ions"]) + "\n") filout_count.write ("BS + ions: " + str(d_count["BS + ions"]) + "\n") filout_count.write ("BS + 1-ion: " + str(d_count["BS + 1-ion"]) + "\n") filout_count.write ("BS + 2-ions: " + str(d_count["BS + 2-ions"]) + "\n") filout_count.write ("BS + more-ions: " + str(d_count["BS + more-ions"]) + "\n") filout_count.write ("Interact Pi-alpha + Pi-beta: " + str(d_count["Interact-1"]) + "\n") filout_count.write ("Interact Pi-beta + Pi-gama: " + str(d_count["Interact-2"]) + "\n") filout_count.close () filout_by_ion = open(p_filout[0:-4] + "byIons_" + name_ligand, "w") l_k = d_ions.keys () for k in l_k : filout_by_ion.write (str (k.capitalize()) + "\t" + str (d_ions[k]) + "\n") filout_by_ion.close () runOtherSoft.barplot (p_filout[0:-4] + "byIons_" + name_ligand)