def runTMalign(path_pr1, path_pr2, path_dir_out, debug=1): # if exist doesnt run again # if not os.path.exists(path_dir_out + "align.out") and not os.path.exists( path_dir_out + "matrix.out") : # case multi run if not os.path.exists(path_dir_out): # if not os.path.exists (path_dir_out + "RMSD") or not os.path.exists (path_dir_out + "matrix.out"): pathManage.generatePath(path_dir_out) p_pr1 = tool.removeChain(path_pr1, path_dir_out) p_pr2 = tool.removeChain(path_pr2, path_dir_out) cmd_run = ( TMalign + " " + str(p_pr1) + " " + str(p_pr2) + " -o " + path_dir_out + "align.out -m " + path_dir_out + "matrix.out" + " > " + path_dir_out + "RMSD" ) if debug: print cmd_run os.system(cmd_run) return [ path_dir_out + "align.out", path_dir_out + "align.out_all", path_dir_out + "align.out_atm", path_dir_out + "align.out_all_atm", path_dir_out + "RMSD", ]
def applyTMAlignList(l_pr_ref, pr_out): pathManage.generatePath(pr_out) nb_pr_ref = len(l_pr_ref) d_out = {} i = 0 while i < nb_pr_ref: j = i + 1 PDB1 = l_pr_ref[i].split("/")[-1][0:4] # print PDB1 while j < nb_pr_ref: PDB2 = l_pr_ref[j].split("/")[-1][0:4] # folder TM align pr_alignement = pr_out + PDB1 + "__" + PDB2 + "/" #print pr_alignement #print PDB1,i, PDB2,j #print l_pr_ref[i] # RUN out_file = runOtherSoft.runTMalign(l_pr_ref[i], l_pr_ref[j], pr_alignement) # clean folders -> pb with several run -> clean too fast -> try / except try: CleanResultTMalign(pr_alignement) except: pass # parse result if not PDB1 in d_out.keys(): if not PDB2 in d_out.keys(): d_out[PDB1] = {} d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign( out_file[-1]) else: d_out[PDB2][PDB1] = {} d_out[PDB2][PDB1] = parseTMalign.parseOutputTMalign( out_file[-1]) else: d_out[PDB1][PDB2] = {} d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign( out_file[-1]) j = j + 1 i = i + 1 return d_out
def classifRefProtein(pr_dataset, l_lig, thresold_identity=30.0, thresold_similarity=30.0): pr_out = pathManage.result("clasifRef") # case fasta file pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/") l_p_fasta = [] for lig in l_lig: pr_dataset = pathManage.dataset(lig) l_file_by_lig = listdir(pr_dataset) l_pr_ref_by_lig = [pr_dataset + x for x in l_file_by_lig] for pr_ref_by_lig in l_pr_ref_by_lig: PDB_folder = pr_ref_by_lig.split("/")[-1] try: l_file = listdir(pr_ref_by_lig) except: continue for file_ref in l_file: if search("^" + PDB_folder, file_ref): PDB_ID = file_ref[0:-4] PDB_ID = PDB_ID[0:4].lower() + PDB_ID[4:] # PDB ID with chain associated p_fasta = downloadFile.importFasta( PDB_ID, pr_align_seq, dir_by_PDB=0, debug=1, fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt") l_p_fasta.append(p_fasta) break d_outNeedle = applyNeedleList(l_p_fasta, pr_align_seq) # writeMatrix writeMatrixFromDico(d_outNeedle, pr_out + "matrixSimilarSeq", "similarity") writeMatrixFromDico(d_outNeedle, pr_out + "matrixIDSeq", "identity") #Group reference -> l 209 p_group_id = GroupRef( d_outNeedle, "identity", pr_out + "groupIdentity" + "_" + str(thresold_identity) + ".txt", thresold_identity, l_lig) p_group_sim = GroupRef( d_outNeedle, "similarity", pr_out + "groupSimilarity" + "_" + str(thresold_similarity) + ".txt", thresold_similarity, l_lig) # merge not alone prot MergeGroup(p_group_id) MergeGroup(p_group_sim)
def runTMalign(path_pr1, path_pr2, path_dir_out, debug=1): # if exist doesnt run again #if not os.path.exists(path_dir_out + "align.out") and not os.path.exists( path_dir_out + "matrix.out") : # case multi run if not os.path.exists(path_dir_out): # if not os.path.exists (path_dir_out + "RMSD") or not os.path.exists (path_dir_out + "matrix.out"): pathManage.generatePath(path_dir_out) p_pr1 = tool.removeChain(path_pr1, path_dir_out) p_pr2 = tool.removeChain(path_pr2, path_dir_out) cmd_run = TMalign + " " + str(p_pr1) + " " + str( p_pr2 ) + " -o " + path_dir_out + "align.out -m " + path_dir_out + "matrix.out" + " > " + path_dir_out + "RMSD" if debug: print cmd_run os.system(cmd_run) return [ path_dir_out + "align.out", path_dir_out + "align.out_all", path_dir_out + "align.out_atm", path_dir_out + "align.out_all_atm", path_dir_out + "RMSD" ]
def applyTMAlignList (l_pr_ref, pr_out): pathManage.generatePath(pr_out) nb_pr_ref = len (l_pr_ref) d_out = {} i = 0 while i < nb_pr_ref : j = i + 1 PDB1 = l_pr_ref[i].split ("/")[-1][0:4] # print PDB1 while j < nb_pr_ref : PDB2 = l_pr_ref[j].split ("/")[-1][0:4] # folder TM align pr_alignement = pr_out + PDB1 + "__" + PDB2 + "/" #print pr_alignement #print PDB1,i, PDB2,j #print l_pr_ref[i] # RUN out_file = runOtherSoft.runTMalign(l_pr_ref[i], l_pr_ref[j], pr_alignement) # clean folders -> pb with several run -> clean too fast -> try / except try : CleanResultTMalign (pr_alignement) except : pass # parse result if not PDB1 in d_out.keys () : if not PDB2 in d_out.keys () : d_out[PDB1] = {} d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign(out_file[-1]) else : d_out[PDB2][PDB1] = {} d_out[PDB2][PDB1] = parseTMalign.parseOutputTMalign(out_file[-1]) else : d_out[PDB1][PDB2] = {} d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign(out_file[-1]) j = j + 1 i = i + 1 return d_out
def classifRefProtein (pr_dataset, l_lig, thresold_identity = 30.0, thresold_similarity = 30.0): pr_out = pathManage.result("clasifRef") # case fasta file pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/") l_p_fasta = [] for lig in l_lig : pr_dataset = pathManage.dataset(lig) l_file_by_lig = listdir(pr_dataset) l_pr_ref_by_lig =[pr_dataset + x for x in l_file_by_lig] for pr_ref_by_lig in l_pr_ref_by_lig : PDB_folder = pr_ref_by_lig.split ("/")[-1] try : l_file = listdir(pr_ref_by_lig) except : continue for file_ref in l_file : if search("^" + PDB_folder, file_ref) : PDB_ID = file_ref[0:-4] PDB_ID = PDB_ID[0:4].lower () + PDB_ID[4:] # PDB ID with chain associated p_fasta = downloadFile.importFasta(PDB_ID, pr_align_seq, dir_by_PDB = 0, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt") l_p_fasta.append (p_fasta) break d_outNeedle = applyNeedleList (l_p_fasta, pr_align_seq) # writeMatrix writeMatrixFromDico (d_outNeedle, pr_out + "matrixSimilarSeq", "similarity" ) writeMatrixFromDico (d_outNeedle, pr_out + "matrixIDSeq", "identity" ) #Group reference -> l 209 p_group_id = GroupRef (d_outNeedle, "identity", pr_out + "groupIdentity" +"_" + str (thresold_identity) + ".txt", thresold_identity, l_lig) p_group_sim = GroupRef (d_outNeedle, "similarity", pr_out + "groupSimilarity" +"_" + str (thresold_similarity) + ".txt", thresold_similarity, l_lig) # merge not alone prot MergeGroup (p_group_id) MergeGroup (p_group_sim)
def downloadPDB (pr_in): pathManage.generatePath(pr_in) managePDB.formatPDBDatabase(pr_in)
def superpositionAllRef (l_ligand, name_folder_final, debug = 1): pr_final = pathManage.result("final_" + name_folder_final) pr_align = pathManage.generatePath(pr_final + "refAlignement/") l_ref = [] d_filout_pdb = {} d_filout_RMSE = {} d_ref = {} l_file_RMSE = [] for ligand in l_ligand : d_filout_pdb[ligand] = open (pr_align + ligand + "_" + "superimposed.pdb" , "w") d_filout_RMSE[ligand] = open (pr_align + ligand + "_" + "RMSE.txt" , "w") l_file_RMSE.append (pr_align + ligand + "_" + "RMSE.txt") l_pr_type_ref = listdir(pr_final) for pr_type_ref in l_pr_type_ref : if debug : print "1", pr_type_ref # case where pr_substruct is a file not a folder try : l_pr_sub = listdir(pr_final + pr_type_ref + "/") except : continue for pr_sub in l_pr_sub : print "2", pr_sub # case cycle -> append in list respertory with new folder if pr_sub == "cycle" : l_pr_sub.remove ("cycle") l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle") for pr_sub_cycle in l_pr_sub_cycle : l_pr_sub.append ("cycle/" + pr_sub_cycle) break for pr_sub in l_pr_sub : try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub) except : pass if debug : print "3", pr_sub for pr_ref in l_pr_ref : if debug : print "4", pr_ref # case no folder try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/") except : continue for name_file in l_file : if search("LGD_REF_A",name_file) and search(".pdb",name_file): #print "2222", l_ref if name_file.split("_")[3][:4] in l_ref : print "!!!!!", "IN" break else : l_ref.append (name_file.split ("_")[3][:4]) ligand = name_file.split ("_")[2] l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM", remove_H=1) l_atom_adenine = substructTools.retrieveAdenine(l_atom_ligand) if not ligand in d_ref.keys () : # stock in tempory dictionary for the reference d_ref[ligand] = [] d_ref[ligand].append (l_atom_ligand) d_ref[ligand].append (l_atom_adenine) writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_ligand, "HETATM", connect_matrix = 1) continue else : rotation, translocation = superimpose.rigid_transform_3D(l_atom_adenine, d_ref[ligand][-1]) if rotation == None or translocation == None : continue # rotation + translation l_atom_lig_rotated = superimpose.applyTranformation(rotation, translocation, l_atom_in=l_atom_ligand) # write PDB file and RMSE # print "============" # print ligand, pr_ref # print len (l_atom_lig_rotated) # print len (d_ref[ligand][0]) # print "============" if len (l_atom_lig_rotated) != len (d_ref[ligand][0]) : continue writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_lig_rotated, "HETATM", connect_matrix = 1) RMSE_ligand = superimpose.rmse(d_ref[ligand][0], l_atom_lig_rotated) d_filout_RMSE[ligand].write (str (pr_ref) + pr_type_ref + "\t" + str(RMSE_ligand) + "\n") # close files for lig in d_filout_pdb.keys () : d_filout_pdb[lig].close () d_filout_RMSE[lig].close () for file_RMSE in l_file_RMSE : runOtherSoft.Rhistogram(file_RMSE, "RMSE_Adenine")
def enantiomer(l_ligand, name_folder_final, debug = 1) : "to do file output" pr_final = pathManage.result("final_" + name_folder_final) pr_enantiomer = pathManage.generatePath(pr_final + "enantiomer/") l_ref = [] d_filout = {} for ligand in l_ligand : d_filout[ligand] = {} d_filout[ligand]["O3OP"]= open (pr_enantiomer + ligand + "_" + "O3OP" , "w") d_filout[ligand]["O4O5"]= open (pr_enantiomer + ligand + "_" + "O4O5" , "w") d_filout[ligand]["OPOP"]= open (pr_enantiomer + ligand + "_" + "OPOP" , "w") l_pr_type_ref = listdir(pr_final) for pr_type_ref in l_pr_type_ref : if debug : print "1", pr_type_ref # case where pr_substruct is a file not a folder try : l_pr_sub = listdir(pr_final + pr_type_ref + "/") except : continue for pr_sub in l_pr_sub : print "2", pr_sub # case cycle -> append in list respertory with new folder if pr_sub == "cycle" : l_pr_sub.remove ("cycle") l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle") for pr_sub_cycle in l_pr_sub_cycle : l_pr_sub.append ("cycle/" + pr_sub_cycle) break for pr_sub in l_pr_sub : try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub) except : pass if debug : print "3", pr_sub for pr_ref in l_pr_ref : if debug : print "4", pr_ref # case no folder try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/") except : continue for name_file in l_file : if search("LGD_REF_A",name_file) and search(".pdb",name_file): #print "2222", l_ref if name_file.split("_")[3][:4] in l_ref : print "!!!!!", "IN" break else : l_ref.append (name_file.split ("_")[3][:4]) ligand = name_file.split ("_")[2] l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM") d_minO3OP = 100 for atom_ligand in l_atom_ligand : if atom_ligand["name"] == "O4'" : atom_O4 = atom_ligand elif atom_ligand["name"] == "O5'" : atom_O5 = atom_ligand elif atom_ligand["name"] == "O3'" : atom_O3 = atom_ligand elif atom_ligand["name"] == "O1A" : atom_O1A = atom_ligand elif atom_ligand["name"] == "O2A" : atom_O2A = atom_ligand elif atom_ligand["name"] == "O1B" : atom_O1B = atom_ligand elif atom_ligand["name"] == "O2B" : atom_O2B = atom_ligand #elif atom_ligand["name"] == "O3B" : # atom_O3B = atom_ligand # d O4 - O5 try : d_O4O5 = parsePDB.distanceTwoatoms(atom_O4, atom_O5) except : continue d_filout[ligand]["O4O5"].write (pr_ref + "_" + pr_type_ref + "\t" + str (d_O4O5) + "\n") # d O3 - OP for atom_ligand in l_atom_ligand : if ligand == "AMP" : if atom_ligand["name"] == "O1P" or atom_ligand["name"] == "O2P" or atom_ligand["name"] == "O3P" : d_tempO3OP = parsePDB.distanceTwoatoms(atom_O3, atom_ligand) if d_tempO3OP < d_minO3OP : d_minO3OP = d_tempO3OP atom_tempO3OP = deepcopy(atom_ligand) else : if atom_ligand["name"] == "O1A" or atom_ligand["name"] == "O2A" or atom_ligand["name"] == "O3A" : d_tempO3OP = parsePDB.distanceTwoatoms(atom_O4, atom_ligand) if d_tempO3OP < d_minO3OP : d_minO3OP = d_tempO3OP atom_tempO3OP = deepcopy(atom_ligand) d_filout[ligand]["O3OP"].write (pr_ref + "_" + pr_type_ref +"_" + str(atom_tempO3OP["name"]) + "\t" + str (d_minO3OP) + "\n") # d OP OP d_OP = {} if ligand == "ATP" or ligand == "ADP" : d_OP ["O1AO1B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O1B) d_OP ["O1AO2B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O2B) #d_OP ["O1AO3B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O3B) d_OP ["O2AO1B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O1B) d_OP ["O2AO2B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O2B) #d_OP ["O2AO3B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O3B) d_minOPOP = min (d_OP.values()) #print d_minOPOP k_min = [name for name, age in d_OP.items() if age == min (d_OP.values())][0] #print k_min d_filout[ligand]["OPOP"].write (pr_ref + "_" + pr_type_ref + "_" + str(k_min) + "\t" + str (d_minOPOP) + "\n") try : del d_OP del atom_O1A del atom_O1B del atom_O2A del atom_O2B except : pass try : del atom_O3 del atom_O4 del atom_O5 except : pass # close files for lig in l_ligand : for type_dist in d_filout[lig].keys () : p_file = d_filout[lig][type_dist].name d_filout[lig][type_dist].close () runOtherSoft.Rhistogram(p_file, type_dist, brk = 20)
def countingSubstituent (name_final, debug = 1): pr_final_folder = pathManage.result("final_" + name_final) d_count = {} d_lig = {} d_by_ref = {} d_count_pr = {} l_file_final = listdir(pr_final_folder) if debug : print "1", pr_final_folder for pr_type_subref in l_file_final : # case where pr type is a file not a folder try : l_pr_sub = listdir(pr_final_folder + pr_type_subref + "/") except : continue if debug: print "2",pr_final_folder + pr_type_subref + "/" # case cycle append one directory if "cycle" in l_pr_sub : l_pr_sub.remove ("cycle") l_second_sub = listdir (pr_final_folder + pr_type_subref + "/cycle/") for second_sub in l_second_sub : l_pr_sub.append ("cycle/" + second_sub) for pr_sub in l_pr_sub : # case where pr_type_substituent is a folder try : l_pr_PDBref = listdir(pr_final_folder + pr_type_subref + "/" + pr_sub + "/") except : continue if debug : print "3", pr_final_folder + pr_type_subref, pr_sub for pr_PDBref in l_pr_PDBref : PDB_ref = pr_PDBref.split ("_")[-1] family_ref = pr_PDBref.split ("-")[0] group_ref = pr_PDBref.split ("_")[0].split ("-")[-1] pr_LGD = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LGD/" pr_LSR = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LSR/" pr_BS = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/BS/" if debug : print "4",pr_LGD print "4", pr_BS print "4", pr_LSR ################ # folder LSR # ################ l_file_LSR = listdir (pr_LSR) for file_LSR in l_file_LSR : # -> count by type sub reference if search ("LSR_", file_LSR) and file_LSR.split ("_")[1] != "REF" : ligand_sub = file_LSR.split ("_")[1] if debug : print "5", file_LSR if not ligand_sub in d_count.keys () : d_count[ligand_sub] = {} if not pr_sub in d_count[ligand_sub].keys () : d_count[ligand_sub][pr_sub] = 0 d_count[ligand_sub][pr_sub] = d_count[ligand_sub][pr_sub] + 1 ################ # complet LSR # ################ elif search ("LSR", file_LSR): # case LSR reference # ###################### if search ("REF_", file_LSR) : lig_ref = file_LSR.split ("_")[2][:3] if not lig_ref in d_by_ref.keys () : d_by_ref[lig_ref] = {} type_ref = pr_type_subref.split ("_")[0] if not type_ref in d_by_ref[lig_ref].keys () : d_by_ref[lig_ref][type_ref] = 0 d_by_ref[lig_ref][type_ref] = d_by_ref[lig_ref][type_ref] + 1 ################# # folder LGD # ################# l_file_LGD = listdir(pr_LGD) for file_LGD in l_file_LGD : # print file_ref if search ("LGD", file_LGD): ligand = file_LGD.split ("_")[1] if ligand == "REF" : continue if not ligand in d_lig.keys () : d_lig[ligand] = {} d_lig[ligand]["count"] = 0 d_lig[ligand]["group"] = [] d_lig[ligand]["family"] = [] d_lig[ligand]["count"] = d_lig[ligand]["count"] + 1 d_lig[ligand]["family"].append (str(family_ref)) d_lig[ligand]["group"].append (str(group_ref)) ############### # folder BS # ############### l_file_BS = listdir(pr_BS) for file_BS in l_file_BS : if search ("BS_REF", file_BS): lig_ref = file_BS.split ("_")[2] pr_ref = file_BS.split ("_")[3].split (".")[0] print lig_ref, pr_ref, "*****" if not lig_ref in d_count_pr.keys () : d_count_pr[lig_ref] = {} d_count_pr[lig_ref]["pr ref"] = [] d_count_pr[lig_ref]["pr queries"] = [] d_count_pr[lig_ref]["lig queries"] = [] if not pr_ref in d_count_pr[lig_ref]["pr ref"] : d_count_pr[lig_ref]["pr ref"].append (pr_ref) try: family = analysis.findFamily (pr_ref, pathManage.dataset (lig_ref) + "family_PDB.txt") if not family in d_count_pr[lig_ref].keys () : d_count_pr[lig_ref][family] = 0 d_count_pr[lig_ref][family] = d_count_pr[lig_ref][family] + 1 except: pass # BS -> query for file_BS in l_file_BS : # for not reference BS if not search ("BS_REF", file_BS) : lig_querie = file_BS.split ("_")[1] prot_querie = file_BS.split ("_")[2][0:4] print prot_querie, lig_querie, "*******" # find ligand reference # lig ref define in previous step d_count_pr[lig_ref]["pr queries"].append (prot_querie) d_count_pr[lig_ref]["lig queries"].append (lig_querie) # write and plot # ################## pr_result = pathManage.generatePath(pr_final_folder + "counting/") for ligand_sub in d_count.keys () : p_filout = pr_result + ligand_sub filout = open (p_filout, "w") filout.write ("\t".join(d_count[ligand_sub].keys ()) + "\n") l_value = [str(x) for x in d_count[ligand_sub].values ()] filout.write ("\t".join(l_value) + "\n") filout.close () runOtherSoft.piePlot(p_filout) filout_lig = open (pr_result + "count_ligand", "w") filout_lig.write ("Ligand ID\tNumber of occurences in the dataset\tNumber of different clusters\tList of clusters\tList of protein families\n") for lig in d_lig.keys () : if d_lig[lig] > 1 : filout_lig.write (str (lig) + "\t" + str (d_lig[lig]["count"]) + "\t" + str(len (list (set(d_lig[lig]["group"])))) + "\t" + " ".join (d_lig[lig]["group"]) + "\t" + " ".join (d_lig[lig]["family"]) + "\n") filout_lig.close () filout_LSR_lig = open (pr_result + "CountByLigandRef", "w") for lig_ref in d_by_ref.keys () : filout_LSR_lig.write ("====" + str (lig_ref) + "====\n") for sub_ref in d_by_ref[lig_ref].keys () : filout_LSR_lig.write (str (sub_ref) + ": " + str (d_by_ref[lig_ref][sub_ref]) + "\n") filout_LSR_lig.close () filout_pr_count = open (pr_result + "count_pr", "w") for lig in d_count_pr.keys () : filout_pr_count.write ("====" + str (lig) + "====\n") filout_pr_count.write ("nb ref pr: " + str (len (d_count_pr[lig]["pr ref"])) + "\n") filout_pr_count.write ("nb querie pr: " + str (len (d_count_pr[lig]["pr queries"])) + "\n") filout_pr_count.write ("nb ligand queries: " + str (len (d_count_pr[lig]["lig queries"])) + "\n") for family in d_count_pr[lig].keys () : if family != "pr ref" and family != "pr queries" and family != "lig queries" : filout_pr_count.write ("Ref " + str (family) + ": " + str (d_count_pr[lig][family]) + "\n") filout_pr_count.close () runOtherSoft.barplot(pr_result + "count_ligand")
def extractLGDfile(prclassif, prresult): """Extract from folder classification """ # test if file in folder result if len(listdir(prresult)) > 1: return prresult lprref = [] lfoldergroups = listdir(prclassif) for foldergroup in lfoldergroups: if foldergroup == "cycle": lsubtypes = listdir(prclassif + "/cycle/") for subtype in lsubtypes: lrefprot = listdir(prclassif + "/cycle/" + subtype) for refprot in lrefprot: lprref.append(prclassif + "/cycle/" + subtype + "/" + refprot) else: lrefprot = listdir(prclassif + "/" + foldergroup + "/") for refprot in lrefprot: lprref.append(prclassif + "/" + foldergroup + "/" + refprot) lout = [] for prefprot in lprref:#########################################to reduce refprot = prefprot.split("/")[-1] if not refprot in lout: pathManage.generatePath(prresult + refprot) lout.append(refprot) # copy file LGD lfileLGD = listdir(prefprot + "/LGD/") for fileLGD in lfileLGD: ligid = fileLGD.split("_")[1] if ligid == "REF": ligid = fileLGD.split("_")[2] pdbid = refprot.split("_")[-1] LSR = "REF" else: pdbid = fileLGD.split("_")[2] LSR = prefprot.split("/")[-2].replace("_", "") if prefprot.split("/")[-3] == "cycle": LSR = "cycle-" + str(LSR) nameout = str(LSR) + "_" + str(ligid) + "_" + str(pdbid) + str(fileLGD[-4:]) copyfile(prefprot + "/LGD/" + fileLGD, prresult + refprot + "/" + nameout) # extract SMILES LSR dLSR = {} pfileLSR = prresult + refprot + "/listLSRsmiles" filoutLSR = open(pfileLSR, "w") # header ltypeLSR = ["pi1", "pi2", "pi3"] filoutLSR.write("\t".join(ltypeLSR) + "\n") prLSRin = prefprot + "/LSR/" lfileLSR = listdir(prLSRin) for fileLSR in lfileLSR: if search("^LSR", fileLSR) and search("pdb", fileLSR): lelemname = fileLSR.split("_") nameLSR = lelemname[1] if nameLSR == "REF": continue else: lig = lelemname[2] PDBid = lelemname[3] smiles = runOtherSoft.babelConvertPDBtoSMILE (prLSRin + fileLSR, rm_smi = 1) #print(smiles, "l101 - ligandSimilarity") kin = str(lig) + "-" + PDBid if not kin in dLSR.keys(): dLSR[kin] = {} for typeLSR in ltypeLSR: dLSR[kin][typeLSR] = "-" dLSR[kin][nameLSR] = smiles # write filout for kin in dLSR.keys(): lsmiles = [dLSR[kin][i] for i in ltypeLSR] filoutLSR.write(kin + "\t" + "\t".join(lsmiles) + "\n") filoutLSR.close() return prresult
def downloadPDB(pr_in): pathManage.generatePath(pr_in) managePDB.formatPDBDatabase(pr_in)
def extractLGDfile(prclassif, prresult): """Extract from folder classification """ # test if file in folder result #if len(listdir(prresult)) > 1: # return prresult lprref = [] lfoldergroups = listdir(prclassif) for foldergroup in lfoldergroups: if foldergroup == "cycle": lsubtypes = listdir(prclassif + "/cycle/") for subtype in lsubtypes: lrefprot = listdir(prclassif + "/cycle/" + subtype) for refprot in lrefprot: lprref.append(prclassif + "/cycle/" + subtype + "/" + refprot) else: lrefprot = listdir(prclassif + "/" + foldergroup + "/") for refprot in lrefprot: lprref.append(prclassif + "/" + foldergroup + "/" + refprot) lout = [] dLSR = {} ltypeLSR = ["pi1", "pi2", "pi3"] for prefprot in lprref:#########################################to reduce refprot = prefprot.split("/")[-1] if not refprot in lout: pathManage.generatePath(prresult + refprot) lout.append(refprot) # copy file LGD lfileLGD = listdir(prefprot + "/LGD/") for fileLGD in lfileLGD: ligid = fileLGD.split("_")[1] if ligid == "REF": ligid = fileLGD.split("_")[2] pdbid = refprot.split("_")[-1] LSR = "REF" else: pdbid = fileLGD.split("_")[2] LSR = prefprot.split("/")[-2].replace("_", "") if prefprot.split("/")[-3] == "cycle": LSR = "cycle-" + str(LSR) nameout = str(LSR) + "_" + str(ligid) + "_" + str(pdbid) + str(fileLGD[-4:]) copyfile(prefprot + "/LGD/" + fileLGD, prresult + refprot + "/" + nameout) # extract SMILES LSR folderresult = prresult + refprot + "/" if not folderresult in dLSR.keys(): dLSR[folderresult] = {} prLSRin = prefprot + "/LSR/" lfileLSR = listdir(prLSRin) for fileLSR in lfileLSR: #print prefprot + "/LSR/" + fileLSR,"l93===ligandSimilarity" if search("^LSR", fileLSR) and search("pdb", fileLSR): lelemname = fileLSR.split("_") nameLSR = lelemname[1] if nameLSR == "REF": continue else: lig = lelemname[2] PDBid = lelemname[3] smiles = runOtherSoft.babelConvertPDBtoSMILE (prLSRin + fileLSR, rm_smi = 1) #print(smiles, "l101 - ligandSimilarity") kin = str(lig) + "-" + PDBid if not kin in dLSR[folderresult].keys(): dLSR[folderresult][kin] = {} for typeLSR in ltypeLSR: dLSR[folderresult][kin][typeLSR] = "-" dLSR[folderresult][kin][nameLSR] = smiles #print dLSR # write filout for folderresult in dLSR.keys(): pfileLSR = folderresult + "listLSRsmiles" if path.exists(pfileLSR): filoutLSR = open(pfileLSR, "a") else: filoutLSR = open(pfileLSR, "w") filoutLSR.write("\t".join(ltypeLSR) + "\n") for kin in dLSR[folderresult].keys(): lsmiles = [dLSR[folderresult][kin][i] for i in ltypeLSR] print lsmiles, "l.122 ligandSimilarity.py" filoutLSR.write(kin + "\t" + "\t".join(lsmiles) + "\n") filoutLSR.close() return prresult