예제 #1
0
파일: buildData.py 프로젝트: papoku/LSRs
def extractReference(p_list_ligand, p_dir_dataset, p_dir_result, substruct):

    # struct reference
    d_dataset = {}

    # retrieve list of ligand in PDB
    d_ligand = tool.parseLigandPDBList(p_list_ligand)

    # download PDB and fasta associated
    l_p_PDB = []
    l_p_fasta = []
    for PDB_ID in d_ligand[substruct]:
        PDB_ID = PDB_ID.upper()
        p_pdb = downloadFile.importPDB(PDB_ID,
                                       p_dir_dataset,
                                       dir_by_PDB=1,
                                       debug=1,
                                       dbPDB="/home/borrel/PDB/")
        p_fasta = downloadFile.importFasta(
            PDB_ID,
            p_dir_dataset,
            dir_by_PDB=1,
            debug=1,
            fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt")

        if p_pdb != 0 and p_fasta != 0:
            l_p_pdb_chain = separeByChain(p_pdb)
            l_p_fasta_chain = separeChainFasta(p_fasta)
            d_dataset[PDB_ID] = {}
            d_dataset[PDB_ID]["p_pdb"] = p_pdb
            d_dataset[PDB_ID]["p_fasta"] = p_fasta
            d_dataset[PDB_ID]["p_pdb_chain"] = l_p_pdb_chain
            d_dataset[PDB_ID]["p_fasta_chain"] = l_p_fasta_chain
            d_dataset[PDB_ID]["conserve"] = 1

    # plot resolution
    p_file_RX = p_dir_result + "resolution_ref.txt"
    file_RX = open(p_file_RX, "w")
    for PDB_ID in d_dataset.keys():
        RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"])
        try:
            d_dataset[PDB_ID]["RX"] = float(RX)
        except:
            d_dataset[PDB_ID]["RX"] = 100.0
        file_RX.write(PDB_ID + "\t" + str(RX) + "\n")
    file_RX.close()

    runOtherSoft.Rhistogram(p_file_RX, "RX_ref_no_filter")
    return d_dataset
예제 #2
0
파일: buildData.py 프로젝트: ABorrel/LSRs
def extractReference (p_list_ligand, p_dir_dataset, p_dir_result, substruct):    
    
    # struct reference
    d_dataset = {}
    
    # retrieve list of ligand in PDB
    d_ligand = tool.parseLigandPDBList (p_list_ligand)
    
    # download PDB and fasta associated
    l_p_PDB = []
    l_p_fasta = []
    for PDB_ID in d_ligand[substruct] :
        PDB_ID = PDB_ID.upper() 
        p_pdb = downloadFile.importPDB(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, dbPDB = "/home/borrel/PDB/" )
        p_fasta = downloadFile.importFasta(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt")
        
        if p_pdb != 0 and p_fasta != 0 : 
            l_p_pdb_chain = separeByChain (p_pdb)
            l_p_fasta_chain = separeChainFasta(p_fasta)
            d_dataset[PDB_ID] = {}
            d_dataset[PDB_ID] ["p_pdb"] = p_pdb
            d_dataset[PDB_ID] ["p_fasta"] = p_fasta
            d_dataset[PDB_ID] ["p_pdb_chain"] = l_p_pdb_chain
            d_dataset[PDB_ID] ["p_fasta_chain"] = l_p_fasta_chain
            d_dataset[PDB_ID] ["conserve"] = 1
        
    # plot resolution
    p_file_RX = p_dir_result + "resolution_ref.txt"
    file_RX = open (p_file_RX, "w")
    for PDB_ID in d_dataset.keys () : 
        RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"])
        try : d_dataset[PDB_ID] ["RX"] = float(RX)
        except : d_dataset[PDB_ID] ["RX"] = 100.0
        file_RX.write (PDB_ID + "\t" + str (RX) + "\n") 
    file_RX.close ()
    
    runOtherSoft.Rhistogram (p_file_RX, "RX_ref_no_filter")
    return d_dataset
예제 #3
0
def qualityExtraction (l_ligand, name_folder, p_list_ligand, thresold_sheap) : 
    
    pr_result = pathManage.result("final_" + name_folder)
    
    filout = open(pr_result + "quality_extraction.txt", "w")
    
    # number PDB by ligand, without filter
    filout.write ("Number PDB by ligand:\n")
    
    d_dataset =  tool.parseLigandPDBList(p_list_ligand)
    for ligand in l_ligand : 
        filout.write (str (ligand) + ": " + str (len (d_dataset[ligand])) + "\n")
    
    # number references
    filout.write ("\n*************\n\nNumber references by ligands:\n")
    for ligand in l_ligand : 
        pr_result_ligand = pathManage.result(ligand)
        nb_ref = -2
        l_file = listdir(pr_result_ligand)
        for f in l_file : 
            if path.isdir (pr_result_ligand + "/" + f) : 
                nb_ref = nb_ref + 1
        filout.write (ligand + ": " + str (nb_ref) + "\n")
        
    # number of query by ref in means and max and min (after blast)
    filout.write ("\n*************\n\nNumber means queries by references:\n")
    p_family_all = pathManage.result() + "reference_family_all.txt"
    filout_family_all = open (p_family_all, "w")
    d_family_all = {}
    for ligand in l_ligand : 
        d_nb_query = {}
        d_family = {}
        p_filout_family = pathManage.result() + "reference_family_" + ligand + ".txt"
        p_filout_family_count = pathManage.result () + "count_family_" + ligand + ".txt"
        filout_family = open (p_filout_family, "w")
        filout_family_count = open (p_filout_family_count, "w")
        pr_result_ligand = pathManage.result(ligand)
        nb_ref = 0
        l_file = listdir(pr_result_ligand)
        for f in l_file : 
            if path.isdir (pr_result_ligand + "/" + f) and len (f) == 4: 
                # count by family
                family_ref = analysis.findFamily(f, pathManage.findFamilyFile (ligand))
                filout_family.write ("\t".join (family_ref) + "\n")
                if not family_ref[-1] in d_family.keys () : 
                    d_family[family_ref[-1]] = 0
                d_family[family_ref[-1]] = d_family[family_ref[-1]] + 1
                # file all
                if not family_ref[-1] in d_family_all.keys () : 
                    d_family_all[family_ref[-1]] = 0
                d_family_all[family_ref[-1]] = d_family_all[family_ref[-1]] + 1
                
                # count number of references
                nb_ref = nb_ref + 1
                d_nb_query[f] = 0
                l_file_queries = listdir(pr_result_ligand + "/" + f + "/")
                for file_query in l_file_queries : 
                    if search ("CX",file_query) : 
                        d_nb_query[f] = d_nb_query[f] + 1
        filout.write (ligand + ": " + str(np.sum(d_nb_query.values ())) + "\n")
        filout.write (ligand + ": " + str(np.mean(d_nb_query.values ())) + "+/-" + str(np.std (d_nb_query.values ())) + "\n")
        filout.write ("MAX " + str (ligand) + ": " + str (max (d_nb_query.values ())) + " " + str (d_nb_query.keys ()[d_nb_query.values ().index (max (d_nb_query.values ()))]) +"\n")
    
        # family
        filout_family_count.write ("\t".join(d_family.keys ()) + "\n")
        l_values = [str(x) for x in d_family.values ()]
        filout_family_count.write ("\t".join(l_values) + "\n")
        filout_family.close ()
        filout_family_count.close ()
        runOtherSoft.piePlot(p_filout_family_count)

    # all family
    filout_family_all.write ("\t".join(d_family_all.keys ()) + "\n")
    l_values = [str(x) for x in d_family_all.values ()]
    filout_family_all.write ("\t".join(l_values) + "\n")
    filout_family_all.close ()    
    runOtherSoft.piePlot(p_family_all)
        
    
    # number subref by ligand
    filout.write ("\n*************\n\nNumber of subref considered:\n")
    for ligand in l_ligand :
        d_nb_sub = {}
        d_nb_sub_sheap = {}
        pr_result_ligand = pathManage.result(ligand)
        l_ref = listdir(pr_result_ligand)
        for ref in l_ref : 
            if path.isdir (pr_result_ligand + "/" + ref) and len (ref) == 4: 
                l_file_queries = listdir(pr_result_ligand + "/" + ref + "/")
                for file_query in l_file_queries : 
                    if search ("substituent",file_query) and search (".pdb",file_query): 
                        atom_substituate = file_query.split ("_")[-2]
                        try : value_sheap = float(file_query.split ("_")[-1][:-4])
                        except : continue
                        if not atom_substituate in d_nb_sub.keys () : 
                            d_nb_sub[atom_substituate] = 0
                        d_nb_sub[atom_substituate] = d_nb_sub[atom_substituate] + 1
                        
                        if value_sheap > thresold_sheap : 
                            if not atom_substituate in d_nb_sub_sheap : 
                                d_nb_sub_sheap[atom_substituate] = 0
                            d_nb_sub_sheap[atom_substituate] = d_nb_sub_sheap[atom_substituate] + 1
        filout.write ("\n" + ligand + "\n")
        for atom_substituate in d_nb_sub.keys () : 
            filout.write (atom_substituate + ": " + str (d_nb_sub[atom_substituate]) + "\n")
            try : filout.write (atom_substituate + " ShaEP: " + str (d_nb_sub_sheap[atom_substituate]) + "\n")
            except : filout.write (atom_substituate + " ShaEP: 0\n")
    filout.close()