Example #1
0
def GroupRef (d_matrix, k_in, p_filout, thresold_group, l_lig):
    

    # !!!!!!!!!!!!!!
    # return p_filout
    d_group = {}
    d_group[1] = []
    
    
    # l unique PDB
    l_PDB = d_matrix.keys ()
    for PDB1 in d_matrix.keys () : 
        for PDB2 in d_matrix[PDB1].keys ():
            if not PDB2 in l_PDB : 
                l_PDB.append (PDB2)
    
    
    for PDB_in in l_PDB : 
        f = 0
        for group in d_group.keys () : 
            # case first requet
            if f == 1 : 
                break
            if group == 1 and d_group[group] == [] : 
                d_group[group].append (PDB_in)
                break
            
            for PDB_classed in d_group[group] : 
                val = 0.0
                try : val = float (d_matrix[PDB_classed][PDB_in][k_in]) 
                except : val = float (d_matrix[PDB_in][PDB_classed][k_in])  
                if val >= thresold_group : 
                    d_group[group].append (PDB_in)
                    f = 1
                    break

        # flag -> in dico group    
        if f == 0 :     
            d_group[group + 1] = [PDB_in]
    
    
    filout = open (p_filout, "w")
    filout.write  ("PDB\tGroup\tFamily\n")
    for group in d_group : 
        for pdb in d_group[group] : 
            for lig in l_lig : 
                # print lig, "====="
                family = analysis.findFamily(pdb, pathManage.findFamilyFile (lig))
                family = family [-1]
                if family != "None" : 
                    filout.write (str (pdb) + "\t" + str (group) + "\t" + str (family) + "\n")
                    break
    filout.close ()
    
    
    return p_filout
Example #2
0
def GroupRef(d_matrix, k_in, p_filout, thresold_group, l_lig):

    # !!!!!!!!!!!!!!
    # return p_filout
    d_group = {}
    d_group[1] = []

    # l unique PDB
    l_PDB = d_matrix.keys()
    for PDB1 in d_matrix.keys():
        for PDB2 in d_matrix[PDB1].keys():
            if not PDB2 in l_PDB:
                l_PDB.append(PDB2)

    for PDB_in in l_PDB:
        f = 0
        for group in d_group.keys():
            # case first requet
            if f == 1:
                break
            if group == 1 and d_group[group] == []:
                d_group[group].append(PDB_in)
                break

            for PDB_classed in d_group[group]:
                val = 0.0
                try:
                    val = float(d_matrix[PDB_classed][PDB_in][k_in])
                except:
                    val = float(d_matrix[PDB_in][PDB_classed][k_in])
                if val >= thresold_group:
                    d_group[group].append(PDB_in)
                    f = 1
                    break

        # flag -> in dico group
        if f == 0:
            d_group[group + 1] = [PDB_in]

    filout = open(p_filout, "w")
    filout.write("PDB\tGroup\tFamily\n")
    for group in d_group:
        for pdb in d_group[group]:
            for lig in l_lig:
                # print lig, "====="
                family = analysis.findFamily(pdb,
                                             pathManage.findFamilyFile(lig))
                family = family[-1]
                if family != "None":
                    filout.write(
                        str(pdb) + "\t" + str(group) + "\t" + str(family) +
                        "\n")
                    break
    filout.close()

    return p_filout
Example #3
0
File: main.py Project: ABorrel/LSRs
def manageResult (l_ligand, name_final, l_out = []):
    
    pr_result = pathManage.result("final_" + name_final)
    # remove the folder 
#     pr_pi = pathManage.result("final/phosphates")
#     pr_ribose = pathManage.result("final/ribose")
    
    
    for name_lig in l_ligand : 
        l_p_smile = pathManage.findListSmileFile(name_lig)
        p_file_famile = pathManage.findFamilyFile (name_lig)
        for p_smile in l_p_smile : 
            if search("ribose", p_smile) and  search(".txt", p_smile) and search("smile", p_smile): 
                arrangeResult.globalArrangement(pr_result, p_smile, p_file_famile, name_lig, l_out) 
            elif search("smile", p_smile) and search(".txt", p_smile) : 
                arrangeResult.globalArrangement(pr_result, p_smile, p_file_famile, name_lig, l_out) 
        
    return 1
Example #4
0
def manageResult(l_ligand, name_final, l_out=[]):

    pr_result = pathManage.result("final_" + name_final)
    # remove the folder
    #     pr_pi = pathManage.result("final/phosphates")
    #     pr_ribose = pathManage.result("final/ribose")

    for name_lig in l_ligand:
        l_p_smile = pathManage.findListSmileFile(name_lig)
        p_file_famile = pathManage.findFamilyFile(name_lig)
        for p_smile in l_p_smile:
            if search("ribose", p_smile) and search(
                    ".txt", p_smile) and search("smile", p_smile):
                arrangeResult.globalArrangement(pr_result, p_smile,
                                                p_file_famile, name_lig, l_out)
            elif search("smile", p_smile) and search(".txt", p_smile):
                arrangeResult.globalArrangement(pr_result, p_smile,
                                                p_file_famile, name_lig, l_out)

    return 1
Example #5
0
def qualityExtraction (l_ligand, name_folder, p_list_ligand, thresold_sheap) : 
    
    pr_result = pathManage.result("final_" + name_folder)
    
    filout = open(pr_result + "quality_extraction.txt", "w")
    
    # number PDB by ligand, without filter
    filout.write ("Number PDB by ligand:\n")
    
    d_dataset =  tool.parseLigandPDBList(p_list_ligand)
    for ligand in l_ligand : 
        filout.write (str (ligand) + ": " + str (len (d_dataset[ligand])) + "\n")
    
    # number references
    filout.write ("\n*************\n\nNumber references by ligands:\n")
    for ligand in l_ligand : 
        pr_result_ligand = pathManage.result(ligand)
        nb_ref = -2
        l_file = listdir(pr_result_ligand)
        for f in l_file : 
            if path.isdir (pr_result_ligand + "/" + f) : 
                nb_ref = nb_ref + 1
        filout.write (ligand + ": " + str (nb_ref) + "\n")
        
    # number of query by ref in means and max and min (after blast)
    filout.write ("\n*************\n\nNumber means queries by references:\n")
    p_family_all = pathManage.result() + "reference_family_all.txt"
    filout_family_all = open (p_family_all, "w")
    d_family_all = {}
    for ligand in l_ligand : 
        d_nb_query = {}
        d_family = {}
        p_filout_family = pathManage.result() + "reference_family_" + ligand + ".txt"
        p_filout_family_count = pathManage.result () + "count_family_" + ligand + ".txt"
        filout_family = open (p_filout_family, "w")
        filout_family_count = open (p_filout_family_count, "w")
        pr_result_ligand = pathManage.result(ligand)
        nb_ref = 0
        l_file = listdir(pr_result_ligand)
        for f in l_file : 
            if path.isdir (pr_result_ligand + "/" + f) and len (f) == 4: 
                # count by family
                family_ref = analysis.findFamily(f, pathManage.findFamilyFile (ligand))
                filout_family.write ("\t".join (family_ref) + "\n")
                if not family_ref[-1] in d_family.keys () : 
                    d_family[family_ref[-1]] = 0
                d_family[family_ref[-1]] = d_family[family_ref[-1]] + 1
                # file all
                if not family_ref[-1] in d_family_all.keys () : 
                    d_family_all[family_ref[-1]] = 0
                d_family_all[family_ref[-1]] = d_family_all[family_ref[-1]] + 1
                
                # count number of references
                nb_ref = nb_ref + 1
                d_nb_query[f] = 0
                l_file_queries = listdir(pr_result_ligand + "/" + f + "/")
                for file_query in l_file_queries : 
                    if search ("CX",file_query) : 
                        d_nb_query[f] = d_nb_query[f] + 1
        filout.write (ligand + ": " + str(np.sum(d_nb_query.values ())) + "\n")
        filout.write (ligand + ": " + str(np.mean(d_nb_query.values ())) + "+/-" + str(np.std (d_nb_query.values ())) + "\n")
        filout.write ("MAX " + str (ligand) + ": " + str (max (d_nb_query.values ())) + " " + str (d_nb_query.keys ()[d_nb_query.values ().index (max (d_nb_query.values ()))]) +"\n")
    
        # family
        filout_family_count.write ("\t".join(d_family.keys ()) + "\n")
        l_values = [str(x) for x in d_family.values ()]
        filout_family_count.write ("\t".join(l_values) + "\n")
        filout_family.close ()
        filout_family_count.close ()
        runOtherSoft.piePlot(p_filout_family_count)

    # all family
    filout_family_all.write ("\t".join(d_family_all.keys ()) + "\n")
    l_values = [str(x) for x in d_family_all.values ()]
    filout_family_all.write ("\t".join(l_values) + "\n")
    filout_family_all.close ()    
    runOtherSoft.piePlot(p_family_all)
        
    
    # number subref by ligand
    filout.write ("\n*************\n\nNumber of subref considered:\n")
    for ligand in l_ligand :
        d_nb_sub = {}
        d_nb_sub_sheap = {}
        pr_result_ligand = pathManage.result(ligand)
        l_ref = listdir(pr_result_ligand)
        for ref in l_ref : 
            if path.isdir (pr_result_ligand + "/" + ref) and len (ref) == 4: 
                l_file_queries = listdir(pr_result_ligand + "/" + ref + "/")
                for file_query in l_file_queries : 
                    if search ("substituent",file_query) and search (".pdb",file_query): 
                        atom_substituate = file_query.split ("_")[-2]
                        try : value_sheap = float(file_query.split ("_")[-1][:-4])
                        except : continue
                        if not atom_substituate in d_nb_sub.keys () : 
                            d_nb_sub[atom_substituate] = 0
                        d_nb_sub[atom_substituate] = d_nb_sub[atom_substituate] + 1
                        
                        if value_sheap > thresold_sheap : 
                            if not atom_substituate in d_nb_sub_sheap : 
                                d_nb_sub_sheap[atom_substituate] = 0
                            d_nb_sub_sheap[atom_substituate] = d_nb_sub_sheap[atom_substituate] + 1
        filout.write ("\n" + ligand + "\n")
        for atom_substituate in d_nb_sub.keys () : 
            filout.write (atom_substituate + ": " + str (d_nb_sub[atom_substituate]) + "\n")
            try : filout.write (atom_substituate + " ShaEP: " + str (d_nb_sub_sheap[atom_substituate]) + "\n")
            except : filout.write (atom_substituate + " ShaEP: 0\n")
    filout.close()