def GroupRef (d_matrix, k_in, p_filout, thresold_group, l_lig): # !!!!!!!!!!!!!! # return p_filout d_group = {} d_group[1] = [] # l unique PDB l_PDB = d_matrix.keys () for PDB1 in d_matrix.keys () : for PDB2 in d_matrix[PDB1].keys (): if not PDB2 in l_PDB : l_PDB.append (PDB2) for PDB_in in l_PDB : f = 0 for group in d_group.keys () : # case first requet if f == 1 : break if group == 1 and d_group[group] == [] : d_group[group].append (PDB_in) break for PDB_classed in d_group[group] : val = 0.0 try : val = float (d_matrix[PDB_classed][PDB_in][k_in]) except : val = float (d_matrix[PDB_in][PDB_classed][k_in]) if val >= thresold_group : d_group[group].append (PDB_in) f = 1 break # flag -> in dico group if f == 0 : d_group[group + 1] = [PDB_in] filout = open (p_filout, "w") filout.write ("PDB\tGroup\tFamily\n") for group in d_group : for pdb in d_group[group] : for lig in l_lig : # print lig, "=====" family = analysis.findFamily(pdb, pathManage.findFamilyFile (lig)) family = family [-1] if family != "None" : filout.write (str (pdb) + "\t" + str (group) + "\t" + str (family) + "\n") break filout.close () return p_filout
def GroupRef(d_matrix, k_in, p_filout, thresold_group, l_lig): # !!!!!!!!!!!!!! # return p_filout d_group = {} d_group[1] = [] # l unique PDB l_PDB = d_matrix.keys() for PDB1 in d_matrix.keys(): for PDB2 in d_matrix[PDB1].keys(): if not PDB2 in l_PDB: l_PDB.append(PDB2) for PDB_in in l_PDB: f = 0 for group in d_group.keys(): # case first requet if f == 1: break if group == 1 and d_group[group] == []: d_group[group].append(PDB_in) break for PDB_classed in d_group[group]: val = 0.0 try: val = float(d_matrix[PDB_classed][PDB_in][k_in]) except: val = float(d_matrix[PDB_in][PDB_classed][k_in]) if val >= thresold_group: d_group[group].append(PDB_in) f = 1 break # flag -> in dico group if f == 0: d_group[group + 1] = [PDB_in] filout = open(p_filout, "w") filout.write("PDB\tGroup\tFamily\n") for group in d_group: for pdb in d_group[group]: for lig in l_lig: # print lig, "=====" family = analysis.findFamily(pdb, pathManage.findFamilyFile(lig)) family = family[-1] if family != "None": filout.write( str(pdb) + "\t" + str(group) + "\t" + str(family) + "\n") break filout.close() return p_filout
def manageResult (l_ligand, name_final, l_out = []): pr_result = pathManage.result("final_" + name_final) # remove the folder # pr_pi = pathManage.result("final/phosphates") # pr_ribose = pathManage.result("final/ribose") for name_lig in l_ligand : l_p_smile = pathManage.findListSmileFile(name_lig) p_file_famile = pathManage.findFamilyFile (name_lig) for p_smile in l_p_smile : if search("ribose", p_smile) and search(".txt", p_smile) and search("smile", p_smile): arrangeResult.globalArrangement(pr_result, p_smile, p_file_famile, name_lig, l_out) elif search("smile", p_smile) and search(".txt", p_smile) : arrangeResult.globalArrangement(pr_result, p_smile, p_file_famile, name_lig, l_out) return 1
def manageResult(l_ligand, name_final, l_out=[]): pr_result = pathManage.result("final_" + name_final) # remove the folder # pr_pi = pathManage.result("final/phosphates") # pr_ribose = pathManage.result("final/ribose") for name_lig in l_ligand: l_p_smile = pathManage.findListSmileFile(name_lig) p_file_famile = pathManage.findFamilyFile(name_lig) for p_smile in l_p_smile: if search("ribose", p_smile) and search( ".txt", p_smile) and search("smile", p_smile): arrangeResult.globalArrangement(pr_result, p_smile, p_file_famile, name_lig, l_out) elif search("smile", p_smile) and search(".txt", p_smile): arrangeResult.globalArrangement(pr_result, p_smile, p_file_famile, name_lig, l_out) return 1
def qualityExtraction (l_ligand, name_folder, p_list_ligand, thresold_sheap) : pr_result = pathManage.result("final_" + name_folder) filout = open(pr_result + "quality_extraction.txt", "w") # number PDB by ligand, without filter filout.write ("Number PDB by ligand:\n") d_dataset = tool.parseLigandPDBList(p_list_ligand) for ligand in l_ligand : filout.write (str (ligand) + ": " + str (len (d_dataset[ligand])) + "\n") # number references filout.write ("\n*************\n\nNumber references by ligands:\n") for ligand in l_ligand : pr_result_ligand = pathManage.result(ligand) nb_ref = -2 l_file = listdir(pr_result_ligand) for f in l_file : if path.isdir (pr_result_ligand + "/" + f) : nb_ref = nb_ref + 1 filout.write (ligand + ": " + str (nb_ref) + "\n") # number of query by ref in means and max and min (after blast) filout.write ("\n*************\n\nNumber means queries by references:\n") p_family_all = pathManage.result() + "reference_family_all.txt" filout_family_all = open (p_family_all, "w") d_family_all = {} for ligand in l_ligand : d_nb_query = {} d_family = {} p_filout_family = pathManage.result() + "reference_family_" + ligand + ".txt" p_filout_family_count = pathManage.result () + "count_family_" + ligand + ".txt" filout_family = open (p_filout_family, "w") filout_family_count = open (p_filout_family_count, "w") pr_result_ligand = pathManage.result(ligand) nb_ref = 0 l_file = listdir(pr_result_ligand) for f in l_file : if path.isdir (pr_result_ligand + "/" + f) and len (f) == 4: # count by family family_ref = analysis.findFamily(f, pathManage.findFamilyFile (ligand)) filout_family.write ("\t".join (family_ref) + "\n") if not family_ref[-1] in d_family.keys () : d_family[family_ref[-1]] = 0 d_family[family_ref[-1]] = d_family[family_ref[-1]] + 1 # file all if not family_ref[-1] in d_family_all.keys () : d_family_all[family_ref[-1]] = 0 d_family_all[family_ref[-1]] = d_family_all[family_ref[-1]] + 1 # count number of references nb_ref = nb_ref + 1 d_nb_query[f] = 0 l_file_queries = listdir(pr_result_ligand + "/" + f + "/") for file_query in l_file_queries : if search ("CX",file_query) : d_nb_query[f] = d_nb_query[f] + 1 filout.write (ligand + ": " + str(np.sum(d_nb_query.values ())) + "\n") filout.write (ligand + ": " + str(np.mean(d_nb_query.values ())) + "+/-" + str(np.std (d_nb_query.values ())) + "\n") filout.write ("MAX " + str (ligand) + ": " + str (max (d_nb_query.values ())) + " " + str (d_nb_query.keys ()[d_nb_query.values ().index (max (d_nb_query.values ()))]) +"\n") # family filout_family_count.write ("\t".join(d_family.keys ()) + "\n") l_values = [str(x) for x in d_family.values ()] filout_family_count.write ("\t".join(l_values) + "\n") filout_family.close () filout_family_count.close () runOtherSoft.piePlot(p_filout_family_count) # all family filout_family_all.write ("\t".join(d_family_all.keys ()) + "\n") l_values = [str(x) for x in d_family_all.values ()] filout_family_all.write ("\t".join(l_values) + "\n") filout_family_all.close () runOtherSoft.piePlot(p_family_all) # number subref by ligand filout.write ("\n*************\n\nNumber of subref considered:\n") for ligand in l_ligand : d_nb_sub = {} d_nb_sub_sheap = {} pr_result_ligand = pathManage.result(ligand) l_ref = listdir(pr_result_ligand) for ref in l_ref : if path.isdir (pr_result_ligand + "/" + ref) and len (ref) == 4: l_file_queries = listdir(pr_result_ligand + "/" + ref + "/") for file_query in l_file_queries : if search ("substituent",file_query) and search (".pdb",file_query): atom_substituate = file_query.split ("_")[-2] try : value_sheap = float(file_query.split ("_")[-1][:-4]) except : continue if not atom_substituate in d_nb_sub.keys () : d_nb_sub[atom_substituate] = 0 d_nb_sub[atom_substituate] = d_nb_sub[atom_substituate] + 1 if value_sheap > thresold_sheap : if not atom_substituate in d_nb_sub_sheap : d_nb_sub_sheap[atom_substituate] = 0 d_nb_sub_sheap[atom_substituate] = d_nb_sub_sheap[atom_substituate] + 1 filout.write ("\n" + ligand + "\n") for atom_substituate in d_nb_sub.keys () : filout.write (atom_substituate + ": " + str (d_nb_sub[atom_substituate]) + "\n") try : filout.write (atom_substituate + " ShaEP: " + str (d_nb_sub_sheap[atom_substituate]) + "\n") except : filout.write (atom_substituate + " ShaEP: 0\n") filout.close()