Ejemplo n.º 1
0
def classifRefProtein(pr_dataset,
                      l_lig,
                      thresold_identity=30.0,
                      thresold_similarity=30.0):

    pr_out = pathManage.result("clasifRef")

    # case fasta file
    pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/")
    l_p_fasta = []
    for lig in l_lig:
        pr_dataset = pathManage.dataset(lig)
        l_file_by_lig = listdir(pr_dataset)
        l_pr_ref_by_lig = [pr_dataset + x for x in l_file_by_lig]
        for pr_ref_by_lig in l_pr_ref_by_lig:
            PDB_folder = pr_ref_by_lig.split("/")[-1]

            try:
                l_file = listdir(pr_ref_by_lig)
            except:
                continue
            for file_ref in l_file:
                if search("^" + PDB_folder, file_ref):
                    PDB_ID = file_ref[0:-4]
                    PDB_ID = PDB_ID[0:4].lower() + PDB_ID[4:]
                    # PDB ID with chain associated
                    p_fasta = downloadFile.importFasta(
                        PDB_ID,
                        pr_align_seq,
                        dir_by_PDB=0,
                        debug=1,
                        fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt")
                    l_p_fasta.append(p_fasta)
                    break

    d_outNeedle = applyNeedleList(l_p_fasta, pr_align_seq)

    # writeMatrix
    writeMatrixFromDico(d_outNeedle, pr_out + "matrixSimilarSeq", "similarity")
    writeMatrixFromDico(d_outNeedle, pr_out + "matrixIDSeq", "identity")

    #Group reference -> l 209
    p_group_id = GroupRef(
        d_outNeedle, "identity",
        pr_out + "groupIdentity" + "_" + str(thresold_identity) + ".txt",
        thresold_identity, l_lig)
    p_group_sim = GroupRef(
        d_outNeedle, "similarity",
        pr_out + "groupSimilarity" + "_" + str(thresold_similarity) + ".txt",
        thresold_similarity, l_lig)

    # merge not alone prot
    MergeGroup(p_group_id)
    MergeGroup(p_group_sim)
Ejemplo n.º 2
0
def extractReference(p_list_ligand, p_dir_dataset, p_dir_result, substruct):

    # struct reference
    d_dataset = {}

    # retrieve list of ligand in PDB
    d_ligand = tool.parseLigandPDBList(p_list_ligand)

    # download PDB and fasta associated
    l_p_PDB = []
    l_p_fasta = []
    for PDB_ID in d_ligand[substruct]:
        PDB_ID = PDB_ID.upper()
        p_pdb = downloadFile.importPDB(PDB_ID,
                                       p_dir_dataset,
                                       dir_by_PDB=1,
                                       debug=1,
                                       dbPDB="/home/borrel/PDB/")
        p_fasta = downloadFile.importFasta(
            PDB_ID,
            p_dir_dataset,
            dir_by_PDB=1,
            debug=1,
            fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt")

        if p_pdb != 0 and p_fasta != 0:
            l_p_pdb_chain = separeByChain(p_pdb)
            l_p_fasta_chain = separeChainFasta(p_fasta)
            d_dataset[PDB_ID] = {}
            d_dataset[PDB_ID]["p_pdb"] = p_pdb
            d_dataset[PDB_ID]["p_fasta"] = p_fasta
            d_dataset[PDB_ID]["p_pdb_chain"] = l_p_pdb_chain
            d_dataset[PDB_ID]["p_fasta_chain"] = l_p_fasta_chain
            d_dataset[PDB_ID]["conserve"] = 1

    # plot resolution
    p_file_RX = p_dir_result + "resolution_ref.txt"
    file_RX = open(p_file_RX, "w")
    for PDB_ID in d_dataset.keys():
        RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"])
        try:
            d_dataset[PDB_ID]["RX"] = float(RX)
        except:
            d_dataset[PDB_ID]["RX"] = 100.0
        file_RX.write(PDB_ID + "\t" + str(RX) + "\n")
    file_RX.close()

    runOtherSoft.Rhistogram(p_file_RX, "RX_ref_no_filter")
    return d_dataset
Ejemplo n.º 3
0
def classifRefProtein (pr_dataset, l_lig, thresold_identity = 30.0, thresold_similarity = 30.0):
    
    pr_out = pathManage.result("clasifRef")
    
    # case fasta file
    pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/")
    l_p_fasta = []
    for lig in l_lig : 
        pr_dataset = pathManage.dataset(lig)
        l_file_by_lig = listdir(pr_dataset)
        l_pr_ref_by_lig =[pr_dataset + x for x in l_file_by_lig]
        for pr_ref_by_lig in l_pr_ref_by_lig : 
            PDB_folder = pr_ref_by_lig.split ("/")[-1]
            
            try : l_file = listdir(pr_ref_by_lig)
            except : continue
            for file_ref in l_file : 
                if search("^" + PDB_folder, file_ref) :
                    PDB_ID = file_ref[0:-4]
                    PDB_ID = PDB_ID[0:4].lower () + PDB_ID[4:]
                    # PDB ID with chain associated
                    p_fasta = downloadFile.importFasta(PDB_ID, pr_align_seq, dir_by_PDB = 0, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt")
                    l_p_fasta.append (p_fasta)
                    break
            
                
    d_outNeedle = applyNeedleList (l_p_fasta, pr_align_seq)
    
    # writeMatrix
    writeMatrixFromDico (d_outNeedle, pr_out + "matrixSimilarSeq", "similarity" )
    writeMatrixFromDico (d_outNeedle, pr_out + "matrixIDSeq", "identity" )
    
    #Group reference -> l 209
    p_group_id = GroupRef (d_outNeedle, "identity", pr_out + "groupIdentity" +"_" + str (thresold_identity) + ".txt", thresold_identity, l_lig)
    p_group_sim = GroupRef (d_outNeedle, "similarity", pr_out + "groupSimilarity" +"_" + str (thresold_similarity) + ".txt", thresold_similarity, l_lig)
    
    # merge not alone prot
    MergeGroup (p_group_id)
    MergeGroup (p_group_sim)
Ejemplo n.º 4
0
def extractReference (p_list_ligand, p_dir_dataset, p_dir_result, substruct):    
    
    # struct reference
    d_dataset = {}
    
    # retrieve list of ligand in PDB
    d_ligand = tool.parseLigandPDBList (p_list_ligand)
    
    # download PDB and fasta associated
    l_p_PDB = []
    l_p_fasta = []
    for PDB_ID in d_ligand[substruct] :
        PDB_ID = PDB_ID.upper() 
        p_pdb = downloadFile.importPDB(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, dbPDB = "/home/borrel/PDB/" )
        p_fasta = downloadFile.importFasta(PDB_ID, p_dir_dataset, dir_by_PDB = 1, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt")
        
        if p_pdb != 0 and p_fasta != 0 : 
            l_p_pdb_chain = separeByChain (p_pdb)
            l_p_fasta_chain = separeChainFasta(p_fasta)
            d_dataset[PDB_ID] = {}
            d_dataset[PDB_ID] ["p_pdb"] = p_pdb
            d_dataset[PDB_ID] ["p_fasta"] = p_fasta
            d_dataset[PDB_ID] ["p_pdb_chain"] = l_p_pdb_chain
            d_dataset[PDB_ID] ["p_fasta_chain"] = l_p_fasta_chain
            d_dataset[PDB_ID] ["conserve"] = 1
        
    # plot resolution
    p_file_RX = p_dir_result + "resolution_ref.txt"
    file_RX = open (p_file_RX, "w")
    for PDB_ID in d_dataset.keys () : 
        RX = parsePDB.resolution(d_dataset[PDB_ID]["p_pdb"])
        try : d_dataset[PDB_ID] ["RX"] = float(RX)
        except : d_dataset[PDB_ID] ["RX"] = 100.0
        file_RX.write (PDB_ID + "\t" + str (RX) + "\n") 
    file_RX.close ()
    
    runOtherSoft.Rhistogram (p_file_RX, "RX_ref_no_filter")
    return d_dataset