Example #1
0
def countingSubstituent (name_final, debug = 1):
    
    pr_final_folder = pathManage.result("final_" + name_final)
    
    d_count = {}
    d_lig = {}
    d_by_ref = {}
    d_count_pr = {}
    l_file_final = listdir(pr_final_folder)
    if debug : print "1", pr_final_folder
    for pr_type_subref in l_file_final :
        # case where pr type is a file not a folder
        try : l_pr_sub = listdir(pr_final_folder + pr_type_subref + "/")
        except : continue
        if debug: print "2",pr_final_folder +  pr_type_subref + "/"
        
        # case cycle append one directory
        if "cycle" in l_pr_sub : 
            l_pr_sub.remove ("cycle")
            l_second_sub = listdir (pr_final_folder + pr_type_subref + "/cycle/")
        
            for second_sub in l_second_sub : 
                l_pr_sub.append ("cycle/" + second_sub)


        for pr_sub in l_pr_sub : 
            # case where pr_type_substituent is a folder
            try : l_pr_PDBref = listdir(pr_final_folder + pr_type_subref + "/" + pr_sub + "/")
            except : continue
            if debug : print "3", pr_final_folder + pr_type_subref, pr_sub             

            for pr_PDBref in l_pr_PDBref :
                PDB_ref = pr_PDBref.split ("_")[-1]
                family_ref = pr_PDBref.split ("-")[0]
                group_ref = pr_PDBref.split ("_")[0].split ("-")[-1]
                pr_LGD = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LGD/"
                pr_LSR = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LSR/"
                pr_BS = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/BS/"
                if debug : 
                    print "4",pr_LGD
                    print "4", pr_BS
                    print "4", pr_LSR




                ################
                #  folder LSR  #
                ################
                l_file_LSR = listdir (pr_LSR)

                for file_LSR in l_file_LSR :
                    # -> count by type sub reference
                    if search ("LSR_", file_LSR) and file_LSR.split ("_")[1] != "REF" :
                        ligand_sub = file_LSR.split ("_")[1]
                        if debug : print "5", file_LSR
                        if not ligand_sub in d_count.keys () : 
                            d_count[ligand_sub] = {}
                    
                        if not pr_sub in d_count[ligand_sub].keys () : 
                            d_count[ligand_sub][pr_sub] = 0
                        d_count[ligand_sub][pr_sub] = d_count[ligand_sub][pr_sub] + 1
                    
                    ################
                    # complet LSR  #
                    ################
                    elif search ("LSR", file_LSR):
                        # case LSR reference #
                        ######################
                        if search ("REF_", file_LSR) :
                            lig_ref = file_LSR.split ("_")[2][:3]
                            if not lig_ref in d_by_ref.keys () : 
                                d_by_ref[lig_ref] = {}

                            type_ref = pr_type_subref.split ("_")[0]

                            if not type_ref in d_by_ref[lig_ref].keys () : 
                                    d_by_ref[lig_ref][type_ref] = 0
                            
                            d_by_ref[lig_ref][type_ref] = d_by_ref[lig_ref][type_ref] + 1
            
            
                #################    
                #  folder LGD   #
                #################
                l_file_LGD = listdir(pr_LGD)
                for file_LGD in l_file_LGD : 
                    # print file_ref
                    if search ("LGD", file_LGD):
                        ligand = file_LGD.split ("_")[1]
                        if ligand == "REF" : 
                            continue
                        if not ligand in d_lig.keys () : 
                            d_lig[ligand] = {}
                            d_lig[ligand]["count"] = 0
                            d_lig[ligand]["group"] = []
                            d_lig[ligand]["family"] = []
                        d_lig[ligand]["count"] = d_lig[ligand]["count"] + 1
                        d_lig[ligand]["family"].append (str(family_ref))
                        d_lig[ligand]["group"].append (str(group_ref))

            
                ###############
                #  folder BS  #
                ###############
                l_file_BS = listdir(pr_BS)
                for file_BS in l_file_BS : 
                    if search ("BS_REF", file_BS):
                        lig_ref = file_BS.split ("_")[2]
                        pr_ref = file_BS.split ("_")[3].split (".")[0]
                        print lig_ref, pr_ref, "*****"
                        if not lig_ref in d_count_pr.keys () : 
                            d_count_pr[lig_ref] = {}
                            d_count_pr[lig_ref]["pr ref"] = []
                            d_count_pr[lig_ref]["pr queries"] = []
                            d_count_pr[lig_ref]["lig queries"] = []
                                   
                        if not pr_ref in d_count_pr[lig_ref]["pr ref"] : 
                            d_count_pr[lig_ref]["pr ref"].append (pr_ref)
                                
                                
                        try:
                            family = analysis.findFamily (pr_ref, pathManage.dataset (lig_ref) + "family_PDB.txt")
                            if not family in d_count_pr[lig_ref].keys () : 
                                d_count_pr[lig_ref][family] = 0
                            d_count_pr[lig_ref][family] = d_count_pr[lig_ref][family] + 1
                        except: pass
                

                # BS -> query
                for file_BS in l_file_BS : 
                    # for not reference BS
                    if not search ("BS_REF", file_BS) : 
                        lig_querie = file_BS.split ("_")[1]
                        prot_querie = file_BS.split ("_")[2][0:4]
                        print prot_querie, lig_querie, "*******"
                        # find ligand reference
                        # lig ref define in previous step
                        d_count_pr[lig_ref]["pr queries"].append (prot_querie)
                        d_count_pr[lig_ref]["lig queries"].append (lig_querie)


    # write and plot #
    ##################
    pr_result = pathManage.generatePath(pr_final_folder + "counting/")
    for ligand_sub in d_count.keys () : 
        p_filout = pr_result + ligand_sub
        filout = open (p_filout, "w")
        filout.write ("\t".join(d_count[ligand_sub].keys ()) + "\n")
        l_value = [str(x) for x in d_count[ligand_sub].values ()]
        filout.write ("\t".join(l_value) + "\n")
        filout.close ()
        runOtherSoft.piePlot(p_filout)
    
    filout_lig = open (pr_result + "count_ligand", "w")
    filout_lig.write ("Ligand ID\tNumber of occurences in the dataset\tNumber of different clusters\tList of clusters\tList of protein families\n")
    for lig in d_lig.keys () : 
        if d_lig[lig] > 1 : 
            filout_lig.write (str (lig) + "\t" + str (d_lig[lig]["count"]) + "\t" + str(len (list (set(d_lig[lig]["group"]))))  + "\t" + " ".join (d_lig[lig]["group"]) + "\t" + " ".join (d_lig[lig]["family"]) + "\n")
    filout_lig.close ()
    
    filout_LSR_lig = open (pr_result + "CountByLigandRef", "w")
    for lig_ref in d_by_ref.keys () : 
        filout_LSR_lig.write ("====" + str (lig_ref) + "====\n")
        for sub_ref in d_by_ref[lig_ref].keys () : 
            filout_LSR_lig.write (str (sub_ref) + ": " + str (d_by_ref[lig_ref][sub_ref]) + "\n")
    filout_LSR_lig.close ()

    filout_pr_count = open (pr_result + "count_pr", "w")
    for lig in d_count_pr.keys () : 
        filout_pr_count.write ("====" + str (lig) + "====\n")
        filout_pr_count.write ("nb ref pr: " + str (len (d_count_pr[lig]["pr ref"])) + "\n")
        filout_pr_count.write ("nb querie pr: " + str (len (d_count_pr[lig]["pr queries"])) + "\n")
        filout_pr_count.write ("nb ligand queries: " + str (len (d_count_pr[lig]["lig queries"])) + "\n")

    for family in d_count_pr[lig].keys () : 
        if family != "pr ref" and family != "pr queries" and family != "lig queries" :
            filout_pr_count.write ("Ref " + str (family) + ": " + str (d_count_pr[lig][family]) + "\n")


    filout_pr_count.close ()

    runOtherSoft.barplot(pr_result + "count_ligand")
Example #2
0
def analyseIons (pr_dataset, name_ligand, p_filout, thresold_max_interaction = 4.0) : 

    l_folder_ref = listdir(pr_dataset)

    filout = open (p_filout, "w")
    if name_ligand == "ATP" : 
        filout.write ("PDB\tIon\tD1\tD2\tD3\tAngle1\tAngle2\tAt1\tAt2\tA3\n")
    else : 
        filout.write ("PDB\tIon\tD1\tD2\tAngle\tAt1\tAt2\n")
    
    # dictionnary of counting
    d_count = {}
    d_count["CX"] = 0
    d_count["CX + ions"] = 0
    d_count["BS + ions"] = 0
    d_count["BS + 1-ion"] = 0
    d_count["BS + 2-ions"] = 0
    d_count["BS + more-ions"] = 0
    d_count["Interact-1"] = 0
    d_count["Interact-2"] = 0

    
    # dictionnary by ions
    d_ions = {}
    
    for ref_folder in l_folder_ref  :
        only_one = 0
        if len (ref_folder) != 4 : 
            continue
        d_count["CX"] = d_count["CX"] + 1
        l_temp = []
        # path and complex
        p_lig_ref = pathManage.findligandRef(pr_dataset + ref_folder + "/", name_ligand)
        p_complex = pathManage.findPDBRef(pr_dataset + ref_folder + "/")
    
        # parsing
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
        l_het_parsed = parsePDB.loadCoordSectionPDB(p_complex, "HETATM")
    
        # retrieve phosphate
        l_pi = retrieveTwoAtomForAngle (lig_ref_parsed, name_ligand)
        if l_pi == [] : # case ligand without phosphate 
            continue 
        flag_interact = 0
        flag_between_1 = 0
        flag_between_2 = 0
        for het_parsed in l_het_parsed : 
            if het_parsed["resName"] in l_ions : 
                d_count["CX + ions"] = d_count["CX + ions"] + 1
                if not het_parsed ["resName"] in d_ions.keys () : 
                    d_ions[het_parsed["resName"]] = 0
                if not het_parsed["resName"] in l_temp :  
                    d_ions[het_parsed["resName"]] = d_ions[het_parsed["resName"]] + 1
                    l_temp.append (het_parsed["resName"])
                PDB_id = ref_folder
                d1 = parsePDB.distanceTwoatoms(l_pi[0], het_parsed)
                d2 = parsePDB.distanceTwoatoms(l_pi[1], het_parsed)
                if name_ligand == "ATP" : 
                    # print len(l_pi), ref_folder, p_lig_ref
                    d3 = parsePDB.distanceTwoatoms(l_pi[2], het_parsed)
                    angle_bis = parsePDB.angleVector(l_pi[1], het_parsed, l_pi[2])
                angle = parsePDB.angleVector(l_pi[0], het_parsed, l_pi[1])
            
                if d1 < 10 and d2 < 10 : 
                    if not het_parsed["resName"] in d_count.keys () : 
                        d_count[het_parsed["resName"]] = 0
                    if only_one == 0 : 
                        d_count[het_parsed["resName"]] = d_count[het_parsed["resName"]] + 1
                        only_one = 1
                    d_count["BS + ions"] = d_count["BS + ions"] + 1
                    flag_interact = flag_interact + 1
                    if d1 < thresold_max_interaction and d2 < thresold_max_interaction : 
                        flag_between_1 = flag_between_1 + 1


                    if name_ligand == "ATP" :
                        if d3 < thresold_max_interaction and d2 < thresold_max_interaction : 
                            flag_between_2 = flag_between_2 + 1
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str (d3) + "\t" + str(angle) + "\t" + str(angle_bis) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\t" + str(l_pi[2]["serial"]) + "\n")
                    else : 
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str(angle) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\n")
    
        if flag_interact == 1 : 
            d_count["BS + 1-ion"] = d_count["BS + 1-ion"] + 1
        elif flag_interact == 2 : 
            d_count["BS + 2-ions"] = d_count["BS + 2-ions"] + 1
        elif flag_interact > 2 : 
            d_count["BS + more-ions"] = d_count["BS + more-ions"] + 1

        if flag_between_1 >= 1 : 
            d_count["Interact-1"] = d_count["Interact-1"] + flag_between_1
        if flag_between_2 >= 1 : 
            d_count["Interact-2"] = d_count["Interact-2"] + flag_between_2 

    filout.close ()
    
    filout_count = open (p_filout[0:-4] + "count.txt", "w")
    filout_count.write ("CX: " + str (d_count["CX"]) + "\n")
    filout_count.write ("CX + ions: " + str (d_count["CX + ions"]) + "\n")
    filout_count.write ("BS + ions: " + str(d_count["BS + ions"]) + "\n")
    filout_count.write ("BS + 1-ion: " + str(d_count["BS + 1-ion"]) + "\n")
    filout_count.write ("BS + 2-ions: " + str(d_count["BS + 2-ions"]) + "\n")
    filout_count.write ("BS + more-ions: " + str(d_count["BS + more-ions"]) + "\n")
    filout_count.write ("Interact Pi-alpha + Pi-beta: " + str(d_count["Interact-1"]) + "\n")
    filout_count.write ("Interact Pi-beta + Pi-gama: " + str(d_count["Interact-2"]) + "\n")
    filout_count.close ()

    filout_by_ion = open(p_filout[0:-4] + "byIons_" + name_ligand, "w")
    l_k = d_ions.keys ()
    for k in l_k : 
        filout_by_ion.write (str (k.capitalize()) + "\t" + str (d_ions[k]) + "\n")
    filout_by_ion.close ()
   
    runOtherSoft.barplot (p_filout[0:-4] + "byIons_" + name_ligand)