예제 #1
0
def runTMalign(path_pr1, path_pr2, path_dir_out, debug=1):
    # if exist doesnt run again
    # if not os.path.exists(path_dir_out + "align.out") and not  os.path.exists( path_dir_out + "matrix.out")  :
    # case multi run
    if not os.path.exists(path_dir_out):
        # if not os.path.exists (path_dir_out + "RMSD") or not os.path.exists (path_dir_out + "matrix.out"):
        pathManage.generatePath(path_dir_out)
        p_pr1 = tool.removeChain(path_pr1, path_dir_out)
        p_pr2 = tool.removeChain(path_pr2, path_dir_out)
        cmd_run = (
            TMalign
            + " "
            + str(p_pr1)
            + " "
            + str(p_pr2)
            + " -o "
            + path_dir_out
            + "align.out -m "
            + path_dir_out
            + "matrix.out"
            + " > "
            + path_dir_out
            + "RMSD"
        )
        if debug:
            print cmd_run
        os.system(cmd_run)

    return [
        path_dir_out + "align.out",
        path_dir_out + "align.out_all",
        path_dir_out + "align.out_atm",
        path_dir_out + "align.out_all_atm",
        path_dir_out + "RMSD",
    ]
예제 #2
0
def applyTMAlignList(l_pr_ref, pr_out):

    pathManage.generatePath(pr_out)
    nb_pr_ref = len(l_pr_ref)
    d_out = {}
    i = 0
    while i < nb_pr_ref:
        j = i + 1
        PDB1 = l_pr_ref[i].split("/")[-1][0:4]
        # print PDB1

        while j < nb_pr_ref:
            PDB2 = l_pr_ref[j].split("/")[-1][0:4]
            # folder TM align
            pr_alignement = pr_out + PDB1 + "__" + PDB2 + "/"
            #print pr_alignement
            #print PDB1,i, PDB2,j
            #print l_pr_ref[i]
            # RUN
            out_file = runOtherSoft.runTMalign(l_pr_ref[i], l_pr_ref[j],
                                               pr_alignement)
            # clean folders -> pb with several run -> clean too fast -> try / except
            try:
                CleanResultTMalign(pr_alignement)
            except:
                pass
            # parse result
            if not PDB1 in d_out.keys():
                if not PDB2 in d_out.keys():
                    d_out[PDB1] = {}
                    d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign(
                        out_file[-1])
                else:
                    d_out[PDB2][PDB1] = {}
                    d_out[PDB2][PDB1] = parseTMalign.parseOutputTMalign(
                        out_file[-1])
            else:
                d_out[PDB1][PDB2] = {}
                d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign(
                    out_file[-1])
            j = j + 1
        i = i + 1

    return d_out
예제 #3
0
def classifRefProtein(pr_dataset,
                      l_lig,
                      thresold_identity=30.0,
                      thresold_similarity=30.0):

    pr_out = pathManage.result("clasifRef")

    # case fasta file
    pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/")
    l_p_fasta = []
    for lig in l_lig:
        pr_dataset = pathManage.dataset(lig)
        l_file_by_lig = listdir(pr_dataset)
        l_pr_ref_by_lig = [pr_dataset + x for x in l_file_by_lig]
        for pr_ref_by_lig in l_pr_ref_by_lig:
            PDB_folder = pr_ref_by_lig.split("/")[-1]

            try:
                l_file = listdir(pr_ref_by_lig)
            except:
                continue
            for file_ref in l_file:
                if search("^" + PDB_folder, file_ref):
                    PDB_ID = file_ref[0:-4]
                    PDB_ID = PDB_ID[0:4].lower() + PDB_ID[4:]
                    # PDB ID with chain associated
                    p_fasta = downloadFile.importFasta(
                        PDB_ID,
                        pr_align_seq,
                        dir_by_PDB=0,
                        debug=1,
                        fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt")
                    l_p_fasta.append(p_fasta)
                    break

    d_outNeedle = applyNeedleList(l_p_fasta, pr_align_seq)

    # writeMatrix
    writeMatrixFromDico(d_outNeedle, pr_out + "matrixSimilarSeq", "similarity")
    writeMatrixFromDico(d_outNeedle, pr_out + "matrixIDSeq", "identity")

    #Group reference -> l 209
    p_group_id = GroupRef(
        d_outNeedle, "identity",
        pr_out + "groupIdentity" + "_" + str(thresold_identity) + ".txt",
        thresold_identity, l_lig)
    p_group_sim = GroupRef(
        d_outNeedle, "similarity",
        pr_out + "groupSimilarity" + "_" + str(thresold_similarity) + ".txt",
        thresold_similarity, l_lig)

    # merge not alone prot
    MergeGroup(p_group_id)
    MergeGroup(p_group_sim)
예제 #4
0
파일: runOtherSoft.py 프로젝트: papoku/LSRs
def runTMalign(path_pr1, path_pr2, path_dir_out, debug=1):
    # if exist doesnt run again
    #if not os.path.exists(path_dir_out + "align.out") and not  os.path.exists( path_dir_out + "matrix.out")  :
    # case multi run
    if not os.path.exists(path_dir_out):
        # if not os.path.exists (path_dir_out + "RMSD") or not os.path.exists (path_dir_out + "matrix.out"):
        pathManage.generatePath(path_dir_out)
        p_pr1 = tool.removeChain(path_pr1, path_dir_out)
        p_pr2 = tool.removeChain(path_pr2, path_dir_out)
        cmd_run = TMalign + " " + str(p_pr1) + " " + str(
            p_pr2
        ) + " -o " + path_dir_out + "align.out -m " + path_dir_out + "matrix.out" + " > " + path_dir_out + "RMSD"
        if debug:
            print cmd_run
        os.system(cmd_run)

    return [
        path_dir_out + "align.out", path_dir_out + "align.out_all",
        path_dir_out + "align.out_atm", path_dir_out + "align.out_all_atm",
        path_dir_out + "RMSD"
    ]
예제 #5
0
def applyTMAlignList (l_pr_ref, pr_out):
    
    
    pathManage.generatePath(pr_out)
    nb_pr_ref = len (l_pr_ref)
    d_out = {}
    i = 0
    while i < nb_pr_ref : 
        j = i + 1
        PDB1 = l_pr_ref[i].split ("/")[-1][0:4]
        # print PDB1
    
        while j < nb_pr_ref :
            PDB2 =  l_pr_ref[j].split ("/")[-1][0:4]
            # folder TM align
            pr_alignement = pr_out + PDB1 + "__" + PDB2 + "/"
            #print pr_alignement
            #print PDB1,i, PDB2,j
            #print l_pr_ref[i]
            # RUN
            out_file = runOtherSoft.runTMalign(l_pr_ref[i], l_pr_ref[j], pr_alignement)
            # clean folders -> pb with several run -> clean too fast -> try / except
            try : CleanResultTMalign (pr_alignement)
            except : pass
            # parse result
            if not PDB1 in d_out.keys () : 
                if not PDB2 in d_out.keys () : 
                    d_out[PDB1] = {}
                    d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign(out_file[-1])
                else : 
                    d_out[PDB2][PDB1] = {}
                    d_out[PDB2][PDB1] = parseTMalign.parseOutputTMalign(out_file[-1])
            else : 
                d_out[PDB1][PDB2] = {}
                d_out[PDB1][PDB2] = parseTMalign.parseOutputTMalign(out_file[-1])
            j = j + 1
        i = i + 1
    
    return d_out
예제 #6
0
def classifRefProtein (pr_dataset, l_lig, thresold_identity = 30.0, thresold_similarity = 30.0):
    
    pr_out = pathManage.result("clasifRef")
    
    # case fasta file
    pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/")
    l_p_fasta = []
    for lig in l_lig : 
        pr_dataset = pathManage.dataset(lig)
        l_file_by_lig = listdir(pr_dataset)
        l_pr_ref_by_lig =[pr_dataset + x for x in l_file_by_lig]
        for pr_ref_by_lig in l_pr_ref_by_lig : 
            PDB_folder = pr_ref_by_lig.split ("/")[-1]
            
            try : l_file = listdir(pr_ref_by_lig)
            except : continue
            for file_ref in l_file : 
                if search("^" + PDB_folder, file_ref) :
                    PDB_ID = file_ref[0:-4]
                    PDB_ID = PDB_ID[0:4].lower () + PDB_ID[4:]
                    # PDB ID with chain associated
                    p_fasta = downloadFile.importFasta(PDB_ID, pr_align_seq, dir_by_PDB = 0, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt")
                    l_p_fasta.append (p_fasta)
                    break
            
                
    d_outNeedle = applyNeedleList (l_p_fasta, pr_align_seq)
    
    # writeMatrix
    writeMatrixFromDico (d_outNeedle, pr_out + "matrixSimilarSeq", "similarity" )
    writeMatrixFromDico (d_outNeedle, pr_out + "matrixIDSeq", "identity" )
    
    #Group reference -> l 209
    p_group_id = GroupRef (d_outNeedle, "identity", pr_out + "groupIdentity" +"_" + str (thresold_identity) + ".txt", thresold_identity, l_lig)
    p_group_sim = GroupRef (d_outNeedle, "similarity", pr_out + "groupSimilarity" +"_" + str (thresold_similarity) + ".txt", thresold_similarity, l_lig)
    
    # merge not alone prot
    MergeGroup (p_group_id)
    MergeGroup (p_group_sim)
예제 #7
0
파일: main.py 프로젝트: ABorrel/LSRs
def downloadPDB (pr_in):

    pathManage.generatePath(pr_in)
    managePDB.formatPDBDatabase(pr_in)
예제 #8
0
def superpositionAllRef (l_ligand, name_folder_final, debug = 1):   
    
    pr_final = pathManage.result("final_" + name_folder_final)
    pr_align = pathManage.generatePath(pr_final + "refAlignement/")
    
    l_ref = []
    d_filout_pdb = {}
    d_filout_RMSE = {}
    d_ref = {}
    l_file_RMSE = []
    for ligand in l_ligand : 
        d_filout_pdb[ligand] = open (pr_align + ligand + "_" + "superimposed.pdb" , "w")
        d_filout_RMSE[ligand] = open (pr_align + ligand + "_" + "RMSE.txt" , "w")
        l_file_RMSE.append (pr_align + ligand + "_" + "RMSE.txt") 
    
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       


                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub  + "/" + pr_ref + "/LGD/" + name_file, "HETATM", remove_H=1)
                        l_atom_adenine = substructTools.retrieveAdenine(l_atom_ligand)
                        if not ligand in d_ref.keys () : 
                            # stock in tempory dictionary for the reference
                            d_ref[ligand] = []
                            d_ref[ligand].append (l_atom_ligand)
                            d_ref[ligand].append (l_atom_adenine)
                            writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_ligand, "HETATM", connect_matrix = 1)
                            continue
                        else : 
                            rotation, translocation =  superimpose.rigid_transform_3D(l_atom_adenine, d_ref[ligand][-1])
                            if rotation == None or translocation == None : 
                                continue
                            # rotation + translation
                            l_atom_lig_rotated = superimpose.applyTranformation(rotation, translocation, l_atom_in=l_atom_ligand)
                            # write PDB file and RMSE
#                             print "============"
#                             print ligand, pr_ref
#                             print len (l_atom_lig_rotated)
#                             print len (d_ref[ligand][0])
#                             print "============"
                            if len (l_atom_lig_rotated) != len (d_ref[ligand][0]) : 
                                continue
                        
                            writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_lig_rotated, "HETATM", connect_matrix = 1)
                            RMSE_ligand = superimpose.rmse(d_ref[ligand][0], l_atom_lig_rotated)
                            d_filout_RMSE[ligand].write (str (pr_ref) + pr_type_ref  + "\t" + str(RMSE_ligand) + "\n")
            
    # close files
    for lig in d_filout_pdb.keys () : 
        d_filout_pdb[lig].close ()
        d_filout_RMSE[lig].close ()

    for file_RMSE in l_file_RMSE : 
        runOtherSoft.Rhistogram(file_RMSE, "RMSE_Adenine")                                 
예제 #9
0
def enantiomer(l_ligand, name_folder_final, debug = 1) : 
    "to do file output"
    
    pr_final = pathManage.result("final_" + name_folder_final)
    
    pr_enantiomer = pathManage.generatePath(pr_final + "enantiomer/")
    
    l_ref = []

    d_filout = {}
    for ligand in l_ligand : 
        d_filout[ligand] = {}
        d_filout[ligand]["O3OP"]= open (pr_enantiomer + ligand + "_" + "O3OP" , "w")
        d_filout[ligand]["O4O5"]= open (pr_enantiomer + ligand + "_" + "O4O5" , "w")
        d_filout[ligand]["OPOP"]= open (pr_enantiomer + ligand + "_" + "OPOP" , "w")
        
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       
 
                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM")
                        d_minO3OP = 100
                        for atom_ligand in l_atom_ligand : 
                            if atom_ligand["name"] == "O4'" :
                                atom_O4 = atom_ligand
                            elif atom_ligand["name"] == "O5'" :
                                atom_O5 = atom_ligand
                            elif  atom_ligand["name"] == "O3'" :
                                atom_O3 = atom_ligand
                            elif  atom_ligand["name"] == "O1A" :
                                atom_O1A = atom_ligand
                            elif  atom_ligand["name"] == "O2A" :
                                atom_O2A = atom_ligand
                            elif  atom_ligand["name"] == "O1B" :
                                atom_O1B = atom_ligand
                            elif  atom_ligand["name"] == "O2B" :
                                atom_O2B = atom_ligand
                            #elif  atom_ligand["name"] == "O3B" :
                            #    atom_O3B = atom_ligand
                    
                        # d O4 - O5        
                        try : d_O4O5 = parsePDB.distanceTwoatoms(atom_O4, atom_O5)
                        except : continue
                        d_filout[ligand]["O4O5"].write (pr_ref + "_" + pr_type_ref  + "\t" + str (d_O4O5) + "\n")

                        # d O3 - OP
                        for atom_ligand in l_atom_ligand : 
                            if ligand == "AMP" : 
                                if atom_ligand["name"] == "O1P" or atom_ligand["name"] == "O2P" or atom_ligand["name"] == "O3P" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O3, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                            else : 
                                if atom_ligand["name"] == "O1A" or atom_ligand["name"] == "O2A" or atom_ligand["name"] == "O3A" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O4, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                        d_filout[ligand]["O3OP"].write (pr_ref + "_" + pr_type_ref  +"_" + str(atom_tempO3OP["name"]) + "\t" + str (d_minO3OP) + "\n")
    
                        # d OP OP
                        d_OP = {}
                        if ligand == "ATP" or ligand == "ADP" : 
                            d_OP ["O1AO1B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O1B)
                            d_OP ["O1AO2B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O2B)
                            #d_OP ["O1AO3B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O3B)
                            d_OP ["O2AO1B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O1B)
                            d_OP ["O2AO2B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O2B)
                            #d_OP ["O2AO3B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O3B)
                        
                            d_minOPOP = min (d_OP.values())
                            #print d_minOPOP
                            k_min = [name for name, age in d_OP.items() if age == min (d_OP.values())][0]
                            #print k_min
                            d_filout[ligand]["OPOP"].write (pr_ref + "_" + pr_type_ref  + "_" + str(k_min) + "\t" + str (d_minOPOP) + "\n")
                    
                        try :
                            del d_OP 
                            del atom_O1A
                            del atom_O1B
                            del atom_O2A
                            del atom_O2B
                        except : 
                            pass
                        try : 
                            del atom_O3
                            del atom_O4
                            del atom_O5
                        except :
                            pass
            
    # close files
    for lig in l_ligand : 
        for type_dist in d_filout[lig].keys () : 
            p_file = d_filout[lig][type_dist].name
            d_filout[lig][type_dist].close ()
            runOtherSoft.Rhistogram(p_file, type_dist, brk = 20)
예제 #10
0
def countingSubstituent (name_final, debug = 1):
    
    pr_final_folder = pathManage.result("final_" + name_final)
    
    d_count = {}
    d_lig = {}
    d_by_ref = {}
    d_count_pr = {}
    l_file_final = listdir(pr_final_folder)
    if debug : print "1", pr_final_folder
    for pr_type_subref in l_file_final :
        # case where pr type is a file not a folder
        try : l_pr_sub = listdir(pr_final_folder + pr_type_subref + "/")
        except : continue
        if debug: print "2",pr_final_folder +  pr_type_subref + "/"
        
        # case cycle append one directory
        if "cycle" in l_pr_sub : 
            l_pr_sub.remove ("cycle")
            l_second_sub = listdir (pr_final_folder + pr_type_subref + "/cycle/")
        
            for second_sub in l_second_sub : 
                l_pr_sub.append ("cycle/" + second_sub)


        for pr_sub in l_pr_sub : 
            # case where pr_type_substituent is a folder
            try : l_pr_PDBref = listdir(pr_final_folder + pr_type_subref + "/" + pr_sub + "/")
            except : continue
            if debug : print "3", pr_final_folder + pr_type_subref, pr_sub             

            for pr_PDBref in l_pr_PDBref :
                PDB_ref = pr_PDBref.split ("_")[-1]
                family_ref = pr_PDBref.split ("-")[0]
                group_ref = pr_PDBref.split ("_")[0].split ("-")[-1]
                pr_LGD = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LGD/"
                pr_LSR = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LSR/"
                pr_BS = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/BS/"
                if debug : 
                    print "4",pr_LGD
                    print "4", pr_BS
                    print "4", pr_LSR




                ################
                #  folder LSR  #
                ################
                l_file_LSR = listdir (pr_LSR)

                for file_LSR in l_file_LSR :
                    # -> count by type sub reference
                    if search ("LSR_", file_LSR) and file_LSR.split ("_")[1] != "REF" :
                        ligand_sub = file_LSR.split ("_")[1]
                        if debug : print "5", file_LSR
                        if not ligand_sub in d_count.keys () : 
                            d_count[ligand_sub] = {}
                    
                        if not pr_sub in d_count[ligand_sub].keys () : 
                            d_count[ligand_sub][pr_sub] = 0
                        d_count[ligand_sub][pr_sub] = d_count[ligand_sub][pr_sub] + 1
                    
                    ################
                    # complet LSR  #
                    ################
                    elif search ("LSR", file_LSR):
                        # case LSR reference #
                        ######################
                        if search ("REF_", file_LSR) :
                            lig_ref = file_LSR.split ("_")[2][:3]
                            if not lig_ref in d_by_ref.keys () : 
                                d_by_ref[lig_ref] = {}

                            type_ref = pr_type_subref.split ("_")[0]

                            if not type_ref in d_by_ref[lig_ref].keys () : 
                                    d_by_ref[lig_ref][type_ref] = 0
                            
                            d_by_ref[lig_ref][type_ref] = d_by_ref[lig_ref][type_ref] + 1
            
            
                #################    
                #  folder LGD   #
                #################
                l_file_LGD = listdir(pr_LGD)
                for file_LGD in l_file_LGD : 
                    # print file_ref
                    if search ("LGD", file_LGD):
                        ligand = file_LGD.split ("_")[1]
                        if ligand == "REF" : 
                            continue
                        if not ligand in d_lig.keys () : 
                            d_lig[ligand] = {}
                            d_lig[ligand]["count"] = 0
                            d_lig[ligand]["group"] = []
                            d_lig[ligand]["family"] = []
                        d_lig[ligand]["count"] = d_lig[ligand]["count"] + 1
                        d_lig[ligand]["family"].append (str(family_ref))
                        d_lig[ligand]["group"].append (str(group_ref))

            
                ###############
                #  folder BS  #
                ###############
                l_file_BS = listdir(pr_BS)
                for file_BS in l_file_BS : 
                    if search ("BS_REF", file_BS):
                        lig_ref = file_BS.split ("_")[2]
                        pr_ref = file_BS.split ("_")[3].split (".")[0]
                        print lig_ref, pr_ref, "*****"
                        if not lig_ref in d_count_pr.keys () : 
                            d_count_pr[lig_ref] = {}
                            d_count_pr[lig_ref]["pr ref"] = []
                            d_count_pr[lig_ref]["pr queries"] = []
                            d_count_pr[lig_ref]["lig queries"] = []
                                   
                        if not pr_ref in d_count_pr[lig_ref]["pr ref"] : 
                            d_count_pr[lig_ref]["pr ref"].append (pr_ref)
                                
                                
                        try:
                            family = analysis.findFamily (pr_ref, pathManage.dataset (lig_ref) + "family_PDB.txt")
                            if not family in d_count_pr[lig_ref].keys () : 
                                d_count_pr[lig_ref][family] = 0
                            d_count_pr[lig_ref][family] = d_count_pr[lig_ref][family] + 1
                        except: pass
                

                # BS -> query
                for file_BS in l_file_BS : 
                    # for not reference BS
                    if not search ("BS_REF", file_BS) : 
                        lig_querie = file_BS.split ("_")[1]
                        prot_querie = file_BS.split ("_")[2][0:4]
                        print prot_querie, lig_querie, "*******"
                        # find ligand reference
                        # lig ref define in previous step
                        d_count_pr[lig_ref]["pr queries"].append (prot_querie)
                        d_count_pr[lig_ref]["lig queries"].append (lig_querie)


    # write and plot #
    ##################
    pr_result = pathManage.generatePath(pr_final_folder + "counting/")
    for ligand_sub in d_count.keys () : 
        p_filout = pr_result + ligand_sub
        filout = open (p_filout, "w")
        filout.write ("\t".join(d_count[ligand_sub].keys ()) + "\n")
        l_value = [str(x) for x in d_count[ligand_sub].values ()]
        filout.write ("\t".join(l_value) + "\n")
        filout.close ()
        runOtherSoft.piePlot(p_filout)
    
    filout_lig = open (pr_result + "count_ligand", "w")
    filout_lig.write ("Ligand ID\tNumber of occurences in the dataset\tNumber of different clusters\tList of clusters\tList of protein families\n")
    for lig in d_lig.keys () : 
        if d_lig[lig] > 1 : 
            filout_lig.write (str (lig) + "\t" + str (d_lig[lig]["count"]) + "\t" + str(len (list (set(d_lig[lig]["group"]))))  + "\t" + " ".join (d_lig[lig]["group"]) + "\t" + " ".join (d_lig[lig]["family"]) + "\n")
    filout_lig.close ()
    
    filout_LSR_lig = open (pr_result + "CountByLigandRef", "w")
    for lig_ref in d_by_ref.keys () : 
        filout_LSR_lig.write ("====" + str (lig_ref) + "====\n")
        for sub_ref in d_by_ref[lig_ref].keys () : 
            filout_LSR_lig.write (str (sub_ref) + ": " + str (d_by_ref[lig_ref][sub_ref]) + "\n")
    filout_LSR_lig.close ()

    filout_pr_count = open (pr_result + "count_pr", "w")
    for lig in d_count_pr.keys () : 
        filout_pr_count.write ("====" + str (lig) + "====\n")
        filout_pr_count.write ("nb ref pr: " + str (len (d_count_pr[lig]["pr ref"])) + "\n")
        filout_pr_count.write ("nb querie pr: " + str (len (d_count_pr[lig]["pr queries"])) + "\n")
        filout_pr_count.write ("nb ligand queries: " + str (len (d_count_pr[lig]["lig queries"])) + "\n")

    for family in d_count_pr[lig].keys () : 
        if family != "pr ref" and family != "pr queries" and family != "lig queries" :
            filout_pr_count.write ("Ref " + str (family) + ": " + str (d_count_pr[lig][family]) + "\n")


    filout_pr_count.close ()

    runOtherSoft.barplot(pr_result + "count_ligand")
예제 #11
0
def extractLGDfile(prclassif, prresult):
    """Extract from folder classification """

        # test if file in folder result
    if len(listdir(prresult)) > 1:
        return prresult


    lprref = []
    lfoldergroups = listdir(prclassif)
    for foldergroup in lfoldergroups:
        if foldergroup == "cycle":
            lsubtypes = listdir(prclassif + "/cycle/")
            for subtype in lsubtypes:
                lrefprot = listdir(prclassif + "/cycle/" + subtype)
                for refprot in lrefprot:
                    lprref.append(prclassif + "/cycle/" + subtype + "/" + refprot)
        else:
            lrefprot = listdir(prclassif + "/" +  foldergroup + "/")
            for refprot in lrefprot:
                lprref.append(prclassif + "/" + foldergroup + "/" + refprot)


    lout = []
    for prefprot in lprref:#########################################to reduce
        refprot = prefprot.split("/")[-1]
        if not refprot in lout:
            pathManage.generatePath(prresult + refprot)
            lout.append(refprot)
        # copy file LGD
        lfileLGD = listdir(prefprot + "/LGD/")
        for fileLGD in lfileLGD:
            ligid = fileLGD.split("_")[1]
            if ligid == "REF":
                ligid = fileLGD.split("_")[2]
                pdbid = refprot.split("_")[-1]
                LSR = "REF"
            else:
                pdbid = fileLGD.split("_")[2]
                LSR = prefprot.split("/")[-2].replace("_", "")
                if prefprot.split("/")[-3] == "cycle":
                    LSR = "cycle-" + str(LSR)
            nameout = str(LSR) + "_" + str(ligid) + "_" + str(pdbid) + str(fileLGD[-4:])
            copyfile(prefprot + "/LGD/" + fileLGD, prresult + refprot + "/" + nameout)

        # extract SMILES LSR
        dLSR = {}
        pfileLSR = prresult + refprot + "/listLSRsmiles"
        filoutLSR = open(pfileLSR, "w")
        # header
        ltypeLSR = ["pi1", "pi2", "pi3"]
        filoutLSR.write("\t".join(ltypeLSR) + "\n")

        prLSRin = prefprot + "/LSR/"
        lfileLSR = listdir(prLSRin)
        for fileLSR in lfileLSR:
            if search("^LSR", fileLSR) and search("pdb", fileLSR):
                lelemname = fileLSR.split("_")
                nameLSR = lelemname[1]
                if nameLSR == "REF":
                    continue
                else:
                    lig = lelemname[2]
                    PDBid = lelemname[3]
                    smiles =  runOtherSoft.babelConvertPDBtoSMILE (prLSRin + fileLSR, rm_smi = 1)
                    #print(smiles, "l101 - ligandSimilarity")
                    kin = str(lig) + "-" + PDBid
                    if not kin in dLSR.keys():
                        dLSR[kin] = {}
                        for typeLSR in ltypeLSR:
                            dLSR[kin][typeLSR] = "-"
                    dLSR[kin][nameLSR] = smiles

        # write filout
        for kin in dLSR.keys():
            lsmiles = [dLSR[kin][i] for i in ltypeLSR]
            filoutLSR.write(kin + "\t" + "\t".join(lsmiles) + "\n")
        filoutLSR.close()
    return prresult
예제 #12
0
def downloadPDB(pr_in):

    pathManage.generatePath(pr_in)
    managePDB.formatPDBDatabase(pr_in)
예제 #13
0
def extractLGDfile(prclassif, prresult):
    """Extract from folder classification """

    # test if file in folder result
    #if len(listdir(prresult)) > 1:
    #    return prresult


    lprref = []
    lfoldergroups = listdir(prclassif)
    for foldergroup in lfoldergroups:
        if foldergroup == "cycle":
            lsubtypes = listdir(prclassif + "/cycle/")
            for subtype in lsubtypes:
                lrefprot = listdir(prclassif + "/cycle/" + subtype)
                for refprot in lrefprot:
                    lprref.append(prclassif + "/cycle/" + subtype + "/" + refprot)
        else:
            lrefprot = listdir(prclassif + "/" +  foldergroup + "/")
            for refprot in lrefprot:
                lprref.append(prclassif + "/" + foldergroup + "/" + refprot)


    lout = []
    dLSR = {}
    ltypeLSR = ["pi1", "pi2", "pi3"]
    for prefprot in lprref:#########################################to reduce
        refprot = prefprot.split("/")[-1]
        if not refprot in lout:
            pathManage.generatePath(prresult + refprot)
            lout.append(refprot)
        # copy file LGD
        lfileLGD = listdir(prefprot + "/LGD/")
        for fileLGD in lfileLGD:
            ligid = fileLGD.split("_")[1]
            if ligid == "REF":
                ligid = fileLGD.split("_")[2]
                pdbid = refprot.split("_")[-1]
                LSR = "REF"
            else:
                pdbid = fileLGD.split("_")[2]
                LSR = prefprot.split("/")[-2].replace("_", "")
                if prefprot.split("/")[-3] == "cycle":
                    LSR = "cycle-" + str(LSR)
            nameout = str(LSR) + "_" + str(ligid) + "_" + str(pdbid) + str(fileLGD[-4:])
            copyfile(prefprot + "/LGD/" + fileLGD, prresult + refprot + "/" + nameout)

        # extract SMILES LSR
        folderresult = prresult + refprot + "/"
        if not folderresult in dLSR.keys():
            dLSR[folderresult] = {}

        prLSRin = prefprot + "/LSR/"
        lfileLSR = listdir(prLSRin)
        for fileLSR in lfileLSR:
            #print prefprot + "/LSR/" + fileLSR,"l93===ligandSimilarity"
            if search("^LSR", fileLSR) and search("pdb", fileLSR):
                lelemname = fileLSR.split("_")
                nameLSR = lelemname[1]
                if nameLSR == "REF":
                    continue
                else:
                    lig = lelemname[2]
                    PDBid = lelemname[3]
                    smiles =  runOtherSoft.babelConvertPDBtoSMILE (prLSRin + fileLSR, rm_smi = 1)
                    #print(smiles, "l101 - ligandSimilarity")
                    kin = str(lig) + "-" + PDBid
                    if not kin in dLSR[folderresult].keys():
                        dLSR[folderresult][kin] = {}
                        for typeLSR in ltypeLSR:
                            dLSR[folderresult][kin][typeLSR] = "-"
                    dLSR[folderresult][kin][nameLSR] = smiles
    #print dLSR
    # write filout
    for folderresult in dLSR.keys():
        pfileLSR = folderresult + "listLSRsmiles"

        if path.exists(pfileLSR):
            filoutLSR = open(pfileLSR, "a")
        else:
            filoutLSR = open(pfileLSR, "w")
            filoutLSR.write("\t".join(ltypeLSR) + "\n")

        for kin in dLSR[folderresult].keys():
            lsmiles = [dLSR[folderresult][kin][i] for i in ltypeLSR]
            print lsmiles, "l.122 ligandSimilarity.py"
            filoutLSR.write(kin + "\t" + "\t".join(lsmiles) + "\n")
        filoutLSR.close()
    return prresult