def computeRMSFresBS(self, pr_MDout):

        # load BS in frame 0
        l_pBS = listdir(pr_MDout + "BSs/")
        l_res = []
        for pBS in l_pBS:
            cBS = PDB.PDB(pr_MDout + "BSs/" + pBS)
            dres = cBS.get_byres()
            for res in dres.keys():
                nRes = res.split("_")[1]
                if not nRes in l_res:
                    l_res.append(nRes)

        # rewrite RMSF with binding site
        presRMSF = pr_MDout + "RMSDs/residues/resRMSD"
        ldresRMSF = toolbox.matrixToList(presRMSF)

        # rewrting
        pfilout = pr_MDout + "RMSDs/residues/resRMSD_BS"
        filout = open(pfilout, "w")
        filout.write("NameRes\tall\tCa\tDmax\tBS\n")
        for dresRMSF in ldresRMSF:
            if dresRMSF["NameRes"] in l_res:
                BS = 1
            else:
                BS = 0

            filout.write("%s\t%s\t%s\t%s\t%s\n" %
                         (dresRMSF["NameRes"], dresRMSF["all"], dresRMSF["Ca"],
                          dresRMSF["Dmax"], BS))

        filout.close()
        return pfilout
    def buildRMDSSheets(self):
        """
        Function use to build a pdf by chemicals with RMSD prot, ligand, BS and RMSF by residues of protein
        Need 3 files, RMSD for prot, RMSD for ligand and RMSF
        """
        l_fMDout = listdir(self.pr_MDout)

        # load ChEMBL table
        ldchem = toolbox.matrixToList(self.p_dataset)
        for dchem in ldchem:
            dfile = {}
            dfile["prot RMSD"] = ""
            dfile["lig RMSD"] = ""
            dfile["RMSF residue"] = ""

            ChEMBL_id = dchem["CMPD_CHEMBLID"]
            typeAff = dchem["STANDARD_TYPE"]
            # extract RMSD files
            for fMDout in l_fMDout:
                ChEMBL_id_folder = fMDout.split("_")[0]
                if ChEMBL_id == ChEMBL_id_folder:

                    # protein
                    p_RMSD_prot = self.pr_MDout + fMDout + "/RMSDs/protein/protRMSD"
                    if path.exists(p_RMSD_prot):
                        dfile["prot RMSD"] = p_RMSD_prot
                    else:
                        print "Error in " + fMDout + ": prot RMSD missing"
                        self.computeRMSDProt(self.pr_MDout + fMDout + "/")

                    # lig need to define here
                    p_RMSD_lig = self.pr_MDout + fMDout + "/RMSDs/ligand/ligRMSD"
                    if path.exists(p_RMSD_lig):
                        dfile["lig RMSD"] = p_RMSD_lig
                    else:
                        print "Error in " + fMDout + ": lig RMSD missing"
                        p_RMSD_lig = self.computeRMSDLig(self.pr_MDout +
                                                         fMDout + "/")
                        dfile["lig RMSD"] = p_RMSD_lig

                    # RMSF with the binding site represented
                    p_RMSF_res = self.pr_MDout + fMDout + "/RMSDs/residues/resRMSD_BS"
                    if path.exists(p_RMSF_res):
                        dfile["RMSF residue"] = p_RMSF_res
                    else:
                        print "Error in " + fMDout + ": lig RMSF BS missing"
                        p_RMSF_res = self.computeRMSFresBS(self.pr_MDout +
                                                           fMDout + "/")
                        dfile["lig RMSD"] = p_RMSF_res

                    # build figure 3 panels
                    runExternalSoft.RMSD3panels(
                        dfile["prot RMSD"], dfile["lig RMSD"],
                        dfile["RMSF residue"], ChEMBL_id,
                        pathFolder.createFolder(self.pr_out + typeAff + "/"))
Example #3
0
    def loadRMSDs(self, lload=["ligand", "protein", "residues"]):

        for typeRMSD in lload:
            if typeRMSD == "ligand":
                dlig = {}
                dlig["RMSF"] = {}

                pRMSF = self.prin + typeRMSD + "/ligRMSF"
                dlig["RMSF"] = toolbox.loadMatrixToDict(pRMSF)

                pShaEP = self.prin + typeRMSD + "/ligShaEP"
                dlig["ShaEP"] = toolbox.matrixToList(pShaEP)

                self.lig = dlig

            if typeRMSD == "protein":
                pRMSD = self.prin + typeRMSD + "/protRMSD"
                dprot = toolbox.matrixToList(pRMSD)
                self.prot = dprot

            if typeRMSD == "residues":
                pRes = self.prin + typeRMSD + "/resRMSD"
                dres = toolbox.loadMatrixToDict(pRes)
                self.res = dres
Example #4
0
    def CleanCHEMBLFileProtAff(self):

        # if filout already exist
        pfilout = self.pr_out + "tab_filtered_" + "-".join(self.ltypeAff) + ".csv"

        if path.exists(pfilout):
            ltable = toolbox.matrixToList(pfilout)
            print len(ltable), "Nb selected compounds"
            print ltable[0].keys()
            self.table = ltable
            return ltable

        self.parseCHEMBLFile()
        print len(self.table), "Init cleaning"

        self.selectConfidencecore(cutoff=9)
        print len(self.table), "prot confidence"

        self.getOnlyExactConstant()
        print len(self.table), "strict value"

        self.getByTypeOfAff(self.ltypeAff)
        print len(self.table), self.ltypeAff

        self.MergeIdenticCHEMBLIDforACtivity()
        print len(self.table), "Repetition"

        self.selectAssayType("B")
        print len(self.table), "Type assay"

        # remove some biassay
        self.removeBA(self.outlier)
        print len(self.table), "remove bioassay"

        self.checkIdenticSMI()
        print len(self.table), "Identic SMI"

        self.writeTable(pfilout)

        # return the table in list
        return self.table
    def plot_dockScoreVSActivity(self, ptableCHEMBL):

        pfilout = self.pr_out + "ScoreVSAff.txt"
        if path.exists(pfilout):
            return pfilout
        
        # load ChEMBL dataset
        ldchem = toolbox.matrixToList(ptableCHEMBL)


        filout = open(pfilout, "w")
        filout.write("IDCHEMBL\tDock_score\temodel_score\tAff\ttypeAff\tNB poses\n")

        self.dscores

        for dchem in ldchem:
            print(chemID)
            filout.write("%s\t%s\t%s\t%s\t%s\t%s\n"%(dchem["CMPD_CHEMBLID"], self.dscores[chemID]["r_i_docking_score"], self.dscores[chemID]["r_i_glide_emodel"], dchem[chemID]["PCHEMBL_VALUE"], dchem[chemID]["STANDARD_TYPE"], self.dscores[chemID]["count"]))
        
        filout.close()
        runExternalSoft.corPlot(pfilout, ptableCHEMBL, prout)
    def get_TopRanking(self, ptableCHEMBL, nrank = 5):

        pr_topChem = pathFolder.createFolder(self.pr_out + "top-" + str(nrank) + "/")
        
        # load dock score
        if not "dscores" in self.__dict__:
            self.get_bestdockingscore()

        # load dataset
        ldchem = toolbox.matrixToList(ptableCHEMBL)
        
        dbytypeAct = {}
        for dchem in ldchem:
            typeAff = dchem["STANDARD_TYPE"]
            if not typeAff in dbytypeAct.keys():
                dbytypeAct[typeAff] = {}
            nameChem = dchem["CMPD_CHEMBLID"]
            dbytypeAct[typeAff][nameChem] = dchem

        # extract top 10 dock score
        for typeAff in dbytypeAct.keys():
            print typeAff
            laff = []
            for pose in self.cPoses.lc:
                pose_chem = pose["s_m_entry_name"].split(".")[0]
                if pose_chem in dbytypeAct[typeAff].keys():
                    laff.append(float(pose["r_i_docking_score"]))

            # order
            laff.sort()
            i = 0
            irank = 1
            l_chem_image = []
            while irank <= nrank:
                for pose in self.cPoses.lc:
                    pose_chem = pose["s_m_entry_name"].split(".")[0]
                    try:score_dock = float(pose["r_i_docking_score"])
                    except:continue
                    if score_dock == laff[i]:
                        if pose_chem in l_chem_image:
                            continue
                        else:
                            l_chem_image.append(pose_chem)
                        # path png and sdf
                        p_image = "%s%s_%s-%s.png"%(pr_topChem, irank, pose_chem, typeAff) 
                        p_smiout = "%s%s_%s-%s.smi"%(pr_topChem, irank, pose_chem, typeAff) 

                        # to write on figure
                        lw = ["ChEMBL ID: " + str(pose_chem)]
                        lw.append("Rank: " + str(irank))
                        lw.append("p" + typeAff + ": " + str(dbytypeAct[typeAff][pose_chem]["PCHEMBL_VALUE"]))
                        lw.append("Docking score: {:0.2f}".format(float(pose["r_i_docking_score"])))
                        lw.append("Emodel score: {:0.2f}".format(float(pose["r_i_glide_emodel"])))

                        # generate png
                        f_smiout = open(p_smiout, "w")
                        f_smiout.write(dbytypeAct[typeAff][pose_chem]["CANONICAL_SMILES"])
                        f_smiout.close()

                        # png
                        runExternalSoft.molconvert(p_smiout, p_image)

                        img = Image.open(p_image)
                        imgnew = Image.new("RGBA", (580, 775), (250, 250, 250))
                        imgnew.paste(img, (0,0))
                        draw = ImageDraw.Draw(imgnew)
                        draw.text((10, 600), lw[0], (0, 0, 0), font=font)
                        draw.text((10, 625), lw[1], (0, 0, 0), font=font)
                        draw.text((10, 650), lw[2], (0, 0, 0), font=font)
                        draw.text((10, 675), lw[3], (0, 0, 0), font=font)
                        draw.text((10, 700), lw[4], (0, 0, 0), font=font)
                        imgnew.save(p_image)
                        irank = irank + 1
                        break
                
                i = i + 1