Esempio n. 1
0
def selectBestFilePDBFasta (d_PDB, result_align, substruct) : 
    
    # case with only one PDB
    d_PDB["best"] = {}
    if result_align == [] : 
        d_PDB["best"]["PDB"] = d_PDB["p_pdb"]
        d_PDB["best"]["fasta"] = d_PDB["p_fasta"]
    
    else :
        for primary_key in result_align.keys () : 
            for secondary_key in result_align[primary_key] .keys (): 
                if result_align[primary_key][secondary_key] != "100.0%" : 
                    d_PDB["best"]["PDB"] = d_PDB["p_pdb"]
                    d_PDB["best"]["fasta"] = d_PDB["p_fasta"]
                    # fusion fasta for blast remove header
                    tool.fusionchainfasta(d_PDB["p_fasta"])
                    return 
        
        # control substructure presented // case identity 100%
        nb_chain = len (d_PDB["p_pdb_chain"])
        i = 0
        while i < nb_chain : 
            l_ligand = parsePDB.retrieveListLigand(d_PDB["p_pdb_chain"][i])
            if substruct in l_ligand : 
		# print "999999",d_PDB
                d_PDB["best"]["PDB"] = d_PDB["p_pdb_chain"][i]
                d_PDB["best"]["fasta"] = d_PDB["p_fasta_chain"][i]
                return
            else : 
                i = i + 1
Esempio n. 2
0
def selectBestFilePDBFasta(d_PDB, result_align, substruct):

    # case with only one PDB
    d_PDB["best"] = {}
    if result_align == []:
        d_PDB["best"]["PDB"] = d_PDB["p_pdb"]
        d_PDB["best"]["fasta"] = d_PDB["p_fasta"]

    else:
        for primary_key in result_align.keys():
            for secondary_key in result_align[primary_key].keys():
                if result_align[primary_key][secondary_key] != "100.0%":
                    d_PDB["best"]["PDB"] = d_PDB["p_pdb"]
                    d_PDB["best"]["fasta"] = d_PDB["p_fasta"]
                    # fusion fasta for blast remove header
                    tool.fusionchainfasta(d_PDB["p_fasta"])
                    return

        # control substructure presented // case identity 100%
        nb_chain = len(d_PDB["p_pdb_chain"])
        i = 0
        while i < nb_chain:
            l_ligand = parsePDB.retrieveListLigand(d_PDB["p_pdb_chain"][i])
            if substruct in l_ligand:
                # print "999999",d_PDB
                d_PDB["best"]["PDB"] = d_PDB["p_pdb_chain"][i]
                d_PDB["best"]["fasta"] = d_PDB["p_fasta_chain"][i]
                return
            else:
                i = i + 1
Esempio n. 3
0
File: main.py Progetto: ABorrel/LSRs
def datasetPreparation (ligand_ID, clean = 1):

    p_dir_dataset = pathManage.dataset(ligand_ID)
    l_folder = listdir(p_dir_dataset)
    indent = 0

    for ref_folder in l_folder  :
        # file include in dataset folder
        if len (ref_folder) != 4:
            continue
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        indent = indent + 1
        print ref_folder, indent

        # clean repertory -> only PDB ref and PDB
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        if clean == 1 : 
            for pdbfile in l_pdbfile : 
                p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
                if not search (".pdb", pdbfile ) or search ("subref", pdbfile) or len (pdbfile.split("_")[0]) == 3: 
                    remove (p_file_pdb)

        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
            # extract ligand in PDB
            l_ligand = parsePDB.retrieveListLigand(p_file_pdb)
#             print l_ligand
            if l_ligand == []:
                continue
            else:
                l_atom_pdb_parsed = parsePDB.loadCoordSectionPDB(p_file_pdb)
                for name_ligand in l_ligand : 
                    l_lig_parsed = parsePDB.retrieveLigand(l_atom_pdb_parsed, name_ligand)
                    if l_lig_parsed == [] : 
                        continue
                    p_filout_ligand = p_dir_dataset + ref_folder + "/" + name_ligand + "_" + path.split(p_file_pdb)[1]
                    writePDBfile.coordinateSection(p_filout_ligand , l_lig_parsed[0], "HETATM", header=0 , connect_matrix = 1)

        # ligand_ID write for shaep
#         print p_dir_dataset + ref_folder + "/"
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", ligand_ID)
        if p_lig_ref == 0:

            continue
#         print p_lig_ref
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref)
        d_l_atom_substruct = substructTools.retrieveSubstruct(lig_ref_parsed, ligand_ID)
        # case with AMP without phosphate
        if d_l_atom_substruct == {}:
            continue
        # write ligand_ID
        for subs in d_l_atom_substruct.keys ():
            p_filout_substruct = p_dir_dataset + ref_folder + "/subref_" +  subs + "_" + ref_folder + ".pdb"
            writePDBfile.coordinateSection(p_filout_substruct , d_l_atom_substruct [subs], "HETATM", header=0 , connect_matrix = 1)

    return 1
Esempio n. 4
0
def filterBlastResult (d_dataset, p_dir_dataset, substruct, thresold_RX = 2.5, thresold_blast = 1e-4, debug = 1) : 
    
    """
    Filter resolution PDB
    Filter evalue
    """
    
    for pdb_ref in d_dataset.keys () : 
        if d_dataset[pdb_ref]["conserve"] == 0 : continue
        for pdb_blast_chain in d_dataset[pdb_ref]["align"].keys () : 
            # filter e.value
            if debug == 1 : print d_dataset[pdb_ref]["align"][pdb_blast_chain], pdb_ref, thresold_blast
            # remove thresold and reference cleanner, remove if alignement on same protein
            if d_dataset[pdb_ref]["align"][pdb_blast_chain] <= thresold_blast and pdb_blast_chain [0:4].upper() != pdb_ref: 
                if debug == 1 :print "CONTROL", pdb_blast_chain, d_dataset[pdb_ref]["align"][pdb_blast_chain]
                # dowload PDB files
                pdb_blast = pdb_blast_chain[0:4]
                p_pdb_blast = downloadFile.importPDB(pdb_blast, p_dir_dataset + pdb_ref + "/", dir_by_PDB=0, dbPDB = "/home/borrel/PDB/")
                if p_pdb_blast == 0 : 
                    continue
                
#                 print p_pdb_blast, pdb_blast, "************"
                l_queries_by_chain = separeByChain(p_pdb_blast) # divise chain in reference folder
                try : RX = parsePDB.resolution(p_pdb_blast)
                except : RX = 100.0
                l_ligand = parsePDB.retrieveListLigand(p_pdb_blast)
                if debug == 1 : print p_pdb_blast, l_ligand, RX, "control blast\n" + " ".join(l_queries_by_chain) + "\n" 
                
                # remove apo forms and remove not substiuant + list ligand do not considered
                if l_ligand == [] or substruct in l_ligand: 
                    remove (p_pdb_blast)
                    for queries_by_chain in l_queries_by_chain : 
                        remove (queries_by_chain)
                    continue
                
                # case RMN structure
                try : RX = float(RX)
                except: continue
                
                if debug == 1 :print "----", RX, thresold_RX, "----"
                if float(RX) <= thresold_RX : 
                    if not "blast" in d_dataset[pdb_ref].keys () : 
                        d_dataset[pdb_ref]["blast"] = [pdb_blast_chain.upper ()]
                    else : 
                        # control if other chain in list
                        flag_in = 0
                        for PDB_in in d_dataset[pdb_ref]["blast"] : 
                            if PDB_in.split ("_")[0] == pdb_blast_chain.upper ().split ("_")[0] :
                                flag_in = 1
                        if flag_in == 0 : 
                            d_dataset[pdb_ref]["blast"].append (pdb_blast_chain.upper ())
                            
                    if debug == 1 :print d_dataset[pdb_ref]["blast"], pdb_ref
Esempio n. 5
0
def searchLigands(prPDB, prresult):
    '''search ligands in PDB database
    out : list of ligands with PDB files associated'''

    print "Start Search Ligand In PDB file"
    pfilout = prresult + "resultLigandInPDB"
    # control file exist
    if path.exists(pfilout) and path.getsize(pfilout) != 0:
        return pfilout

    l_PDB = retriveListPDB(prPDB)

    filout = open(pfilout, "w")

    for PDBid in l_PDB:
        llig = parsePDB.retrieveListLigand(prPDB + PDBid.lower() + ".pdb")
        if llig != []:
            filout.write(PDBid + "\t" + " ".join(llig) + "\n")
        else:
            continue

    filout.close()
    return pfilout
Esempio n. 6
0
def searchLigands(prPDB, prresult):
    '''search ligands in PDB database
    out : list of ligands with PDB files associated'''

    print "Start Search Ligand In PDB file"
    pfilout = prresult + "resultLigandInPDB"
    # control file exist
    if path.exists(pfilout) and path.getsize(pfilout) != 0:
        return pfilout

    l_PDB = retriveListPDB(prPDB)

    filout = open(pfilout, "w")

    for PDBid in l_PDB:
        llig = parsePDB.retrieveListLigand(prPDB + PDBid.lower() + ".pdb")
        if llig != []:
            filout.write(PDBid + "\t" + " ".join(llig) + "\n")
        else:
            continue

    filout.close()
    return pfilout
Esempio n. 7
0
def datasetPreparation(ligand_ID, clean=1):

    p_dir_dataset = pathManage.dataset(ligand_ID)
    l_folder = listdir(p_dir_dataset)
    indent = 0

    for ref_folder in l_folder:
        # file include in dataset folder
        if len(ref_folder) != 4:
            continue
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        indent = indent + 1
        print ref_folder, indent

        # clean repertory -> only PDB ref and PDB
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        if clean == 1:
            for pdbfile in l_pdbfile:
                p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
                if not search(".pdb", pdbfile) or search(
                        "subref", pdbfile) or len(pdbfile.split("_")[0]) == 3:
                    remove(p_file_pdb)

        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
            # extract ligand in PDB
            l_ligand = parsePDB.retrieveListLigand(p_file_pdb)
            #             print l_ligand
            if l_ligand == []:
                continue
            else:
                l_atom_pdb_parsed = parsePDB.loadCoordSectionPDB(p_file_pdb)
                for name_ligand in l_ligand:
                    l_lig_parsed = parsePDB.retrieveLigand(
                        l_atom_pdb_parsed, name_ligand)
                    if l_lig_parsed == []:
                        continue
                    p_filout_ligand = p_dir_dataset + ref_folder + "/" + name_ligand + "_" + path.split(
                        p_file_pdb)[1]
                    writePDBfile.coordinateSection(p_filout_ligand,
                                                   l_lig_parsed[0],
                                                   "HETATM",
                                                   header=0,
                                                   connect_matrix=1)

        # ligand_ID write for shaep
#         print p_dir_dataset + ref_folder + "/"
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             ligand_ID)
        if p_lig_ref == 0:

            continue
#         print p_lig_ref
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref)
        d_l_atom_substruct = substructTools.retrieveSubstruct(
            lig_ref_parsed, ligand_ID)
        # case with AMP without phosphate
        if d_l_atom_substruct == {}:
            continue
        # write ligand_ID
        for subs in d_l_atom_substruct.keys():
            p_filout_substruct = p_dir_dataset + ref_folder + "/subref_" + subs + "_" + ref_folder + ".pdb"
            writePDBfile.coordinateSection(p_filout_substruct,
                                           d_l_atom_substruct[subs],
                                           "HETATM",
                                           header=0,
                                           connect_matrix=1)

    return 1
Esempio n. 8
0
def filterBlastResult(d_dataset,
                      p_dir_dataset,
                      substruct,
                      thresold_RX=2.5,
                      thresold_blast=1e-4,
                      debug=1):
    """
    Filter resolution PDB
    Filter evalue
    """

    for pdb_ref in d_dataset.keys():
        if d_dataset[pdb_ref]["conserve"] == 0: continue
        for pdb_blast_chain in d_dataset[pdb_ref]["align"].keys():
            # filter e.value
            if debug == 1:
                print d_dataset[pdb_ref]["align"][
                    pdb_blast_chain], pdb_ref, thresold_blast
            # remove thresold and reference cleanner, remove if alignement on same protein
            if d_dataset[pdb_ref]["align"][
                    pdb_blast_chain] <= thresold_blast and pdb_blast_chain[
                        0:4].upper() != pdb_ref:
                if debug == 1:
                    print "CONTROL", pdb_blast_chain, d_dataset[pdb_ref][
                        "align"][pdb_blast_chain]
                # dowload PDB files
                pdb_blast = pdb_blast_chain[0:4]
                p_pdb_blast = downloadFile.importPDB(pdb_blast,
                                                     p_dir_dataset + pdb_ref +
                                                     "/",
                                                     dir_by_PDB=0,
                                                     dbPDB="/home/borrel/PDB/")
                if p_pdb_blast == 0:
                    continue


#                 print p_pdb_blast, pdb_blast, "************"
                l_queries_by_chain = separeByChain(
                    p_pdb_blast)  # divise chain in reference folder
                try:
                    RX = parsePDB.resolution(p_pdb_blast)
                except:
                    RX = 100.0
                l_ligand = parsePDB.retrieveListLigand(p_pdb_blast)
                if debug == 1:
                    print p_pdb_blast, l_ligand, RX, "control blast\n" + " ".join(
                        l_queries_by_chain) + "\n"

                # remove apo forms and remove not substiuant + list ligand do not considered
                if l_ligand == [] or substruct in l_ligand:
                    remove(p_pdb_blast)
                    for queries_by_chain in l_queries_by_chain:
                        remove(queries_by_chain)
                    continue

                # case RMN structure
                try:
                    RX = float(RX)
                except:
                    continue

                if debug == 1: print "----", RX, thresold_RX, "----"
                if float(RX) <= thresold_RX:
                    if not "blast" in d_dataset[pdb_ref].keys():
                        d_dataset[pdb_ref]["blast"] = [pdb_blast_chain.upper()]
                    else:
                        # control if other chain in list
                        flag_in = 0
                        for PDB_in in d_dataset[pdb_ref]["blast"]:
                            if PDB_in.split("_")[0] == pdb_blast_chain.upper(
                            ).split("_")[0]:
                                flag_in = 1
                        if flag_in == 0:
                            d_dataset[pdb_ref]["blast"].append(
                                pdb_blast_chain.upper())

                    if debug == 1: print d_dataset[pdb_ref]["blast"], pdb_ref