def classifRefProtein(pr_dataset, l_lig, thresold_identity=30.0, thresold_similarity=30.0): pr_out = pathManage.result("clasifRef") # case fasta file pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/") l_p_fasta = [] for lig in l_lig: pr_dataset = pathManage.dataset(lig) l_file_by_lig = listdir(pr_dataset) l_pr_ref_by_lig = [pr_dataset + x for x in l_file_by_lig] for pr_ref_by_lig in l_pr_ref_by_lig: PDB_folder = pr_ref_by_lig.split("/")[-1] try: l_file = listdir(pr_ref_by_lig) except: continue for file_ref in l_file: if search("^" + PDB_folder, file_ref): PDB_ID = file_ref[0:-4] PDB_ID = PDB_ID[0:4].lower() + PDB_ID[4:] # PDB ID with chain associated p_fasta = downloadFile.importFasta( PDB_ID, pr_align_seq, dir_by_PDB=0, debug=1, fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt") l_p_fasta.append(p_fasta) break d_outNeedle = applyNeedleList(l_p_fasta, pr_align_seq) # writeMatrix writeMatrixFromDico(d_outNeedle, pr_out + "matrixSimilarSeq", "similarity") writeMatrixFromDico(d_outNeedle, pr_out + "matrixIDSeq", "identity") #Group reference -> l 209 p_group_id = GroupRef( d_outNeedle, "identity", pr_out + "groupIdentity" + "_" + str(thresold_identity) + ".txt", thresold_identity, l_lig) p_group_sim = GroupRef( d_outNeedle, "similarity", pr_out + "groupSimilarity" + "_" + str(thresold_similarity) + ".txt", thresold_similarity, l_lig) # merge not alone prot MergeGroup(p_group_id) MergeGroup(p_group_sim)
def extractReference(p_list_ligand, p_dir_dataset, p_dir_result, substruct): # struct reference d_dataset = {} # retrieve list of ligand in PDB d_ligand = tool.parseLigandPDBList(p_list_ligand) # download PDB and fasta associated l_p_PDB = [] l_p_fasta = [] for PDB_ID in d_ligand[substruct]: PDB_ID = PDB_ID.upper() p_pdb = downloadFile.importPDB(PDB_ID, p_dir_dataset, dir_by_PDB=1, debug=1, dbPDB="/home/borrel/PDB/") p_fasta = downloadFile.importFasta( PDB_ID, p_dir_dataset, dir_by_PDB=1, debug=1, fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt") if p_pdb != 0 and p_fasta != 0: l_p_pdb_chain = separeByChain(p_pdb) l_p_fasta_chain = separeChainFasta(p_fasta) d_dataset[PDB_ID] = {} d_dataset[PDB_ID]["p_pdb"] = p_pdb d_dataset[PDB_ID]["p_fasta"] = p_fasta d_dataset[PDB_ID]["p_pdb_chain"] = l_p_pdb_chain d_dataset[PDB_ID]["p_fasta_chain"] = l_p_fasta_chain d_dataset[PDB_ID]["conserve"] = 1 # plot resolution p_file_RX = p_dir_result + "resolution_ref.txt" file_RX = open(p_file_RX, "w") for PDB_ID in d_dataset.keys(): RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"]) try: d_dataset[PDB_ID]["RX"] = float(RX) except: d_dataset[PDB_ID]["RX"] = 100.0 file_RX.write(PDB_ID + "\t" + str(RX) + "\n") file_RX.close() runOtherSoft.Rhistogram(p_file_RX, "RX_ref_no_filter") return d_dataset
def classifRefProtein (pr_dataset, l_lig, thresold_identity = 30.0, thresold_similarity = 30.0): pr_out = pathManage.result("clasifRef") # case fasta file pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/") l_p_fasta = [] for lig in l_lig : pr_dataset = pathManage.dataset(lig) l_file_by_lig = listdir(pr_dataset) l_pr_ref_by_lig =[pr_dataset + x for x in l_file_by_lig] for pr_ref_by_lig in l_pr_ref_by_lig : PDB_folder = pr_ref_by_lig.split ("/")[-1] try : l_file = listdir(pr_ref_by_lig) except : continue for file_ref in l_file : if search("^" + PDB_folder, file_ref) : PDB_ID = file_ref[0:-4] PDB_ID = PDB_ID[0:4].lower () + PDB_ID[4:] # PDB ID with chain associated p_fasta = downloadFile.importFasta(PDB_ID, pr_align_seq, dir_by_PDB = 0, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt") l_p_fasta.append (p_fasta) break d_outNeedle = applyNeedleList (l_p_fasta, pr_align_seq) # writeMatrix writeMatrixFromDico (d_outNeedle, pr_out + "matrixSimilarSeq", "similarity" ) writeMatrixFromDico (d_outNeedle, pr_out + "matrixIDSeq", "identity" ) #Group reference -> l 209 p_group_id = GroupRef (d_outNeedle, "identity", pr_out + "groupIdentity" +"_" + str (thresold_identity) + ".txt", thresold_identity, l_lig) p_group_sim = GroupRef (d_outNeedle, "similarity", pr_out + "groupSimilarity" +"_" + str (thresold_similarity) + ".txt", thresold_similarity, l_lig) # merge not alone prot MergeGroup (p_group_id) MergeGroup (p_group_sim)
def extractReference (p_list_ligand, p_dir_dataset, p_dir_result, substruct): # struct reference d_dataset = {} # retrieve list of ligand in PDB d_ligand = tool.parseLigandPDBList (p_list_ligand) # download PDB and fasta associated l_p_PDB = [] l_p_fasta = [] for PDB_ID in d_ligand[substruct] : PDB_ID = PDB_ID.upper() p_pdb = downloadFile.importPDB(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, dbPDB = "/home/borrel/PDB/" ) p_fasta = downloadFile.importFasta(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt") if p_pdb != 0 and p_fasta != 0 : l_p_pdb_chain = separeByChain (p_pdb) l_p_fasta_chain = separeChainFasta(p_fasta) d_dataset[PDB_ID] = {} d_dataset[PDB_ID] ["p_pdb"] = p_pdb d_dataset[PDB_ID] ["p_fasta"] = p_fasta d_dataset[PDB_ID] ["p_pdb_chain"] = l_p_pdb_chain d_dataset[PDB_ID] ["p_fasta_chain"] = l_p_fasta_chain d_dataset[PDB_ID] ["conserve"] = 1 # plot resolution p_file_RX = p_dir_result + "resolution_ref.txt" file_RX = open (p_file_RX, "w") for PDB_ID in d_dataset.keys () : RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"]) try : d_dataset[PDB_ID] ["RX"] = float(RX) except : d_dataset[PDB_ID] ["RX"] = 100.0 file_RX.write (PDB_ID + "\t" + str (RX) + "\n") file_RX.close () runOtherSoft.Rhistogram (p_file_RX, "RX_ref_no_filter") return d_dataset