def computeCoords(self, map_name, table_descriptor, corVal,
                      distributionVal):

        # selection list inchikey and dsstox from chemicals table
        self.map_name = map_name
        pr_out = pathFolder.createFolder(self.pr_out + "update" +
                                         map_name.upper() + "/")
        pr_coords = pathFolder.createFolder(pr_out + "coords/")
        prproj = pathFolder.createFolder(pr_coords + "proj_" + str(corVal) +
                                         "-" + str(distributionVal) + "/")

        # extract descriptor from DB
        p1D2D = pr_coords + "1D2D.csv"
        p3D = pr_coords + "3D.csv"

        if not path.exists(p1D2D):
            self.extractDesc("chemical_description",
                             "chem_descriptor_1d2d_name", "desc_1d2d", p1D2D)

        if not path.exists(p3D):
            self.extractDesc("chemical_description", "chem_descriptor_3d_name",
                             "desc_3d", p3D)

        # create coords
        if not path.exists(prproj +
                           "coord1D2D.csv") or not path.exists(prproj +
                                                               "coord3D.csv"):
            runExternalSoft.RComputeMapFiles(p1D2D, p3D, prproj, corVal,
                                             distributionVal)
        self.pr_coords = prproj
Esempio n. 2
0
def ChemByCurve(cassay, ppng, prout):

    if not "dresponse" in dir(cassay):
        cassay.responseCurves(drawn=0)

    for CASID in cassay.dresponse.keys():
        for condition in cassay.dresponse[CASID].keys():
            curveclass = cassay.dresponse[CASID][condition]['CURVE_CLASS2']
            AC50 = cassay.dresponse[CASID][condition]['AC50']
            prcurve = prout + condition + "/" + curveclass + "/"
            if not path.exists(prcurve):
                pathFolder.createFolder(prcurve)

            writeLine = [
                "CAS: " + str(CASID), "AC50: " + str(AC50),
                "Curve: " + str(curveclass)
            ]
            pcaspng = ppng + CASID + ".png"
            if not path.exists(pcaspng):
                continue

            pimageout = prcurve + CASID + ".png"
            try:
                img = Image.open(pcaspng)
            except:
                continue
            imgnew = Image.new("RGBA", (580, 775), (250, 250, 250))
            imgnew.paste(img, (0, 0))
            draw = ImageDraw.Draw(imgnew)
            draw.text((10, 600), str(writeLine[0]), (0, 0, 0), font=font)
            draw.text((10, 625), str(writeLine[1]), (0, 0, 0), font=font)
            draw.text((10, 650), str(writeLine[2]), (0, 0, 0), font=font)
            imgnew.save(pimageout)
Esempio n. 3
0
    def __init__(self, sdata, pfilecluster, proutcluster, lprdockingpose):

        self.pcluster = pfilecluster
        self.prout = proutcluster
        self.lprdockingpose = lprdockingpose
        self.sdata = sdata

        # affinity
        if sdata != {}:
            daff = {}
            for compound in self.sdata:
                daff[compound["CMPD_CHEMBLID"]] = compound["PCHEMBL_VALUE"]
            self.Aff = daff

        prout = self.prout + "-".join(pfilecluster[0:-4].split("_")[1:]) + "/"
        prout = prout.replace(".", "")
        pathFolder.createFolder(prout)
        self.prout = prout

        filecluster = open(pfilecluster, "r")
        llinesCluster = filecluster.readlines()
        filecluster.close()
        dcluster = {}

        for lineCluster in llinesCluster[1:]:
            cluster = lineCluster.strip().split(",")[-1].replace("\"", "")
            #print cluster
            if not cluster in dcluster.keys():
                dcluster[cluster] = []
            compoundID = lineCluster.strip().split("\"")[1]
            dcluster[cluster].append(compoundID)

        self.clusters = dcluster
Esempio n. 4
0
    def extractFrame(self):
        """Extract frame and wrap water"""

        # for MD launch
        lprframe = []
        i = 1
        for jobname in self.lMD.keys():
            print "Extract - Frame =>", jobname, i
            if "pcmsout" in self.lMD[jobname].keys(
            ) and "prtrj" in self.lMD[jobname].keys():
                prframes = self.pranalysis + str(jobname) + "/framesMD/"
                lprframe.append(prframes)
                pathFolder.createFolder(prframes)
                nbframeth = float(self.MDtime) / (int(self.stepFrame)) / float(
                    self.interval) + 1
                print nbframeth, len(listdir(prframes))
                if len(listdir(prframes)) < int(
                        nbframeth):  # control if frame exist
                    # clean folder
                    pathFolder.cleanFolder(prframes)
                    runExternalSoft.extractFrame(self.lMD[jobname]["pcmsout"],
                                                 self.lMD[jobname]["prtrj"],
                                                 prframes,
                                                 noHOH=self.water,
                                                 step=self.stepFrame,
                                                 MDtime=self.MDtime)
                    lprframe = toolbox.parallelLaunch(
                        lprframe, self.nbCPU,
                        str(int(float(self.MDtime) / 10)))

                self.lMD[jobname]["prframe"] = prframes
            i += 1
Esempio n. 5
0
    def AC50Distribution(self):

        prAC50 = self.proutSP + "histAC50/"
        pathFolder.createFolder(prAC50)

        # run hist plot
        runExternalSoft.plotAC50(self.pAC50, prAC50, self.name.split("-")[1])
Esempio n. 6
0
    def corAC50(self):

        pcor = self.proutSP + "corAC50/"
        pathFolder.createFolder(pcor)

        dtypecurve = {}
        #define different type of fluo
        ltypefluo = []
        for chem in self.lchem:
            typefluo = chem["SAMPLE_DATA_TYPE"]
            if not typefluo in ltypefluo:
                ltypefluo.append(typefluo)

        for chem in self.lchem:
            casID = chem["CAS"]
            if not casID in dtypecurve.keys():
                dtypecurve[casID] = {}
                for typefluo in ltypefluo:
                    dtypecurve[casID][typefluo] = "NA"
            dtypecurve[casID][typefluo] = chem["CURVE_CLASS2"]

        pcurve = pcor + "curve"
        fcurve = open(pcurve, "w")
        fcurve.write("CAS" + "\t" + "\t".join(ltypefluo) + "\n")

        for casID in dtypecurve.keys():
            fcurve.write(
                casID + "\t" +
                "\t".join([str(dtypecurve[casID][k])
                           for k in ltypefluo]) + "\n")
        fcurve.close()

        runExternalSoft.corAC50(self.pAC50, pcurve, pcor)

        return
Esempio n. 7
0
def runQSARClass(cDesc, cAssay, pAC50All, corval, maxQuantile, splitratio,
                 nbCV, ratioAct, nbrepeat, nbNA, nameCell, lchannels, typeData,
                 prout):

    for i in range(1, nbrepeat + 1):
        prQSAR = prout + str(i) + "/"
        #rmtree(prQSAR)############################################################################### to remove
        pathFolder.createFolder(prQSAR)

        cModel = Model(cDesc.pdesc1D2D, cAssay.pAC50, pAC50All, "class",
                       corval, maxQuantile, splitratio, nbCV, ratioAct, nbNA,
                       nameCell, lchannels, prQSAR)
        if typeData == "color":
            cModel.prepDataColor()
        elif typeData == "crosscolor":
            cModel.prepDataCrossColor()
        else:
            cModel.prepData(typeData)
        cModel.buildQSARClass()

    prQSARAV = pathFolder.createFolder(prout + "Average/")
    prQSARProb = pathFolder.createFolder(prout + "Prob/")
    mergeProba(prout, "RF", prQSARProb)
    mergeResults(prout, prQSARAV)
    prDescAV = pathFolder.createFolder(prout + "descImportance/")
    mergeDescInvolve(prout, "LDA", 10, prDescAV)
    mergeDescInvolve(prout, "RF", 10, prDescAV)
Esempio n. 8
0
    def enrichmentIndex(self, pAC50All, FP=1):

        self.pAC50All = pAC50All

        if FP == 1:
            prenrich = self.prout + self.cdesc.pFP.split("/")[-1] + "_" + str(self.aggType) + "_enrich-index" + "/"
        else:
            prenrich = self.prout + str(self.clusterMeth) + "_" + str(self.distmeth) + "_" + str(self.aggType) + "_enrich-index/"
        pathFolder.createFolder(prenrich)

        self.prenrich = prenrich

        lfileenrich = listdir(prenrich)

        if FP == 0:
            if not "pdesclean" in self.__dict__:
                self.pdesclean = prenrich + "descClean.csv"
                if not path.exists(self.pdesclean):
                    runExternalSoft.dataManager(self.cdesc.pdesc1D2D, 0, self.corval, self.maxquantile, prenrich)
            if len(lfileenrich) > 2: return 0
            runExternalSoft.enrichmentIndex(self.pdesclean, prenrich, pAC50All, self.clusterMeth, self.distmeth, self.aggType)
        else:

            if len(lfileenrich) > 2: return 0
            runExternalSoft.enrichmentIndex(self.cdesc.pFP, prenrich, pAC50All, self.clusterMeth, self.distmeth, self.aggType)
        return 0
Esempio n. 9
0
    def visualizeOptimalClustering(self, prresult, FP = 0):


        prFinalClustering = pathFolder.createFolder(prresult + "FinalClustering/")
        print prFinalClustering

        if not "proptimal" in self.__dict__:
            print "Error clustering no load"
        else:
            lfileopt = listdir(self.proptimal)
            for fileopt in lfileopt:
                if search("cluster.csv", fileopt):
                    lelem = fileopt.split("_")
                    cell = lelem [0]
                    colorChannel = "_".join(lelem[1:-1])
                    if FP == 1:
                        desctype = self.proptimal.split("/")[-3][0:-13]
                        pdesc = self.cdesc.pFP
                    else:
                        desctype = "Desc" + "-" + str(self.distmeth) + "-" + str(self.aggType)
                        pdesc = self.pdesclean
                    prtemp = pathFolder.createFolder(prFinalClustering + colorChannel + "/" + cell + "/" + desctype + "/")

                    pcluster = prtemp + fileopt
                    copyfile(self.proptimal + fileopt, pcluster)

                    runExternalSoft.finalClustering(pdesc, self.pAC50All, pcluster, cell + "_" + colorChannel, self.distmeth, self.aggType, prtemp)
        return 0
Esempio n. 10
0
    def computeFP(self, FPtype):

        # set SMI after cleanning
        prSMIclean = self.prDesc + "SMIclean/"
        pathFolder.createFolder(prSMIclean)
        self.prSMIclean = prSMIclean

        dFP = {}
        i = 1
        for pSMI in listdir(
                self.prSMI
        ):  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            # for pSMI in ["/home/borrela2/interference/spDataAnalysis/Desc/SMIclean/1212-72-2.smi"]: # to verify for one chem
            cas = pSMI.split("/")[-1].split(".")[0]
            print cas, i, len(listdir(self.prSMI))
            i += 1

            psmiles = self.prSMI + cas + ".smi"
            if path.exists(self.prSMI + cas + ".smi"):
                fsmiles = open(psmiles, "r")
                smiles = fsmiles.readlines()[0].strip()
                fsmiles.close()

                # chemical
                chem = chemical.chemical(cas, smiles)
                chem.prepareChem(prSMIclean)
                error = chem.computeFP(FPtype)

                if error == 1:
                    print "ERROR FP"
                    continue
                else:
                    dFP[cas] = chem.FP

        self.dFP = dFP
    def computeMissingPNG(self):

        pr_organisedPNG = pathFolder.createFolder(self.pr_out +
                                                  "PNGorganized/")
        pr_temp = pathFolder.createFolder(self.pr_out + "temp_PNG/")

        # load from table
        l_chem_chemicalsDB = self.cDB.execCMD(
            "SELECT DISTINCT smiles_clean, inchikey FROM chemicals WHERE inchikey is not null AND smiles_clean is not NULL"
        )  # extract dsstoxID when inchikey is not null
        shuffle(l_chem_chemicalsDB)
        i = 0
        imax = len(l_chem_chemicalsDB)
        compute = 0
        while i < imax:
            if i % 100 == 0:
                print(i, compute)

            smiles_clean = l_chem_chemicalsDB[i][0]
            inchikey = l_chem_chemicalsDB[i][1]
            p_png = pr_organisedPNG + inchikey[:2] + "/" + inchikey[
                2:4] + "/" + inchikey + ".png"
            if not path.exists(p_png):
                cChem = CompDesc.CompDesc(smiles_clean, pr_temp)
                cChem.inchikey = inchikey
                cChem.smi = smiles_clean
                cChem.computePNG(bg="none")
                p_png_temp = pr_temp + "PNG/" + inchikey + ".png"
                if path.exists(p_png_temp) and path.getsize(p_png_temp) > 0:
                    pathFolder.createFolder(pr_organisedPNG + inchikey[:2] +
                                            "/" + inchikey[2:4] + "/")
                    copyfile(p_png_temp, p_png)
                    compute = compute + 1

            i = i + 1
    def organizePNG(self, cleaning=0):

        pr_organisedPNG = pathFolder.createFolder(self.pr_out +
                                                  "PNGorganized/",
                                                  clean=cleaning)
        pr_desc_png = self.pr_desc + "PNG/"

        l_png = listdir(pr_desc_png)
        shuffle(l_png)
        i = 0
        imax = len(l_png)

        while i < imax:
            if i % 100 == 0:
                print(i)
            p_png = l_png[i]

            # case of file remove before
            try:
                fsize = path.getsize(pr_desc_png + p_png)
            except:
                i = i + 1
                continue

            if fsize == 0:
                remove(pr_desc_png + p_png)
            else:
                pr_1 = pr_organisedPNG + p_png[:2] + "/"
                pr_2 = pr_organisedPNG + p_png[:2] + "/" + p_png[2:4] + "/"
                p_png_out = pr_2 + p_png
                if not path.exists(p_png_out):
                    pathFolder.createFolder(pr_1)
                    pathFolder.createFolder(pr_2)
                    copyfile(pr_desc_png + p_png, p_png_out)
            i = i + 1
Esempio n. 13
0
    def generatePNG(self):

        pathFolder.createFolder(self.prPNG)
        lnSMIs = listdir(self.prSMIclean)

        for nSMI in lnSMIs:
            runExternalSoft.molconvert(self.prSMIclean + nSMI,
                                       self.prPNG + nSMI[:-3] + "png")
Esempio n. 14
0
    def __init__(self, ltypeDesc, typeAff, cutoff, prout):

        self.ltypedesc = ltypeDesc
        self.cutoff = cutoff
        self.typeAff = typeAff

        # define folder with QSAR
        prout = prout + "-".join(self.ltypedesc) + "_" + str(typeAff) + "/"
        pathFolder.createFolder(prout)
        self.prout = prout
Esempio n. 15
0
    def summarize(self, opaff=1):

        for prdockingpose in self.lprdockingpose:
            prout = self.prout + prdockingpose.split("/")[-2] + "/"
            prsum = prout + "Summary/"
            print prsum
            pathFolder.createFolder(prsum)
            lposesAll = listdir(prdockingpose)

            pfilout = prsum + "summary"
            filout = open(pfilout, "w")
            if opaff == 0:
                filout.write(
                    "ID\tMDockingScore\tSDDockingScore\tMEmodel\tSDEmodel\tNB\n"
                )
            else:
                filout.write(
                    "ID\tMDockingScore\tSDDockingScore\tMEmodel\tSDEmodel\tMAff\tSDAff\tNb\n"
                )
            for cluster in self.clusters.keys():
                ldockingScore = []
                lemodel = []
                laff = []
                for compound in self.clusters[cluster]:
                    for poseAll in lposesAll:
                        if search(compound + ".1.sdf", poseAll):
                            pposetemp = prdockingpose + poseAll
                            break
                    if pposetemp == "": continue
                    csdf = parseSDF.sdf(pposetemp)
                    ddock = csdf.get_dockingscore()
                    pposetemp = ""
                    ldockingScore.append(ddock[compound]["r_i_docking_score"])
                    lemodel.append(ddock[compound]["r_i_glide_emodel"])
                    if opaff != 0: laff.append(float(self.Aff[compound]))

                MdockingScore = mean(ldockingScore)
                SDdockingscore = std(ldockingScore)

                Memodel = mean(lemodel)
                SDemodel = std(lemodel)
                nb = len(lemodel)

                if opaff == 1:
                    Maff = mean(laff)
                    SDaff = std(laff)
                    filout.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %
                                 (cluster, MdockingScore, SDdockingscore,
                                  Memodel, SDemodel, Maff, SDaff, nb))
                else:
                    filout.write("%s\t%s\t%s\t%s\t%s\t%s\n" %
                                 (cluster, MdockingScore, SDdockingscore,
                                  Memodel, SDemodel, nb))
            filout.close()
Esempio n. 16
0
    def createSecondaryClustering(self, pClusters):

        fcluster = open(pClusters, "r")
        lchemicals = fcluster.readlines()
        fcluster.close()

        dclust = {}
        for chemical in lchemicals[1:]:
            chemical = chemical.strip().replace("\"", "")
            chemical = chemical.split(",")
            ID = chemical[0]
            cluster = chemical[1]

            if not cluster in dclust.keys():
                dclust[cluster] = []
            dclust[cluster].append(ID)

        #do different for FP and descriptor
        if self.distmeth == None:
            # write cluster and chemical
            for cluster in dclust.keys():
                prcluster = self.prCluster + "Clust" + str(cluster) + "/"
                if not path.exists(prcluster + "cluster.csv"):
                    pathFolder.createFolder(prcluster)
                    pmatrix = prcluster + "FPmatrix"
                    self.cdesc.reduceMatrixFP(dclust[cluster], pmatrix)
                    runExternalSoft.clusteringSimilarityMatrix(pmatrix, prcluster, self.aggType, self.clusterMeth,
                                                               self.optimalNBclustMeth)
        else:
            fdesc = open(self.pdesclean, "r")
            lchemdesc = fdesc.readlines()
            fdesc.close()

            ddesc = {}
            for chemdesc in lchemdesc[1:]:
                ID = chemdesc.split(",")[0].replace("\"", "")
                ddesc[ID] = chemdesc


            #write cluster and chemical
            for cluster in dclust.keys():
                prcluster = self.prCluster + "Clust" + str(cluster) + "/"
                if not path.exists(prcluster + "cluster.csv"):
                    pathFolder.createFolder(prcluster)
                    pdesc = prcluster + "descClean.csv"
                    fdesc = open(pdesc, "w")
                    fdesc.write(lchemdesc[0])

                    for chemID in dclust[cluster]:
                        fdesc.write(ddesc[chemID])
                    fdesc.close()

                    runExternalSoft.clustering(pdesc, "0", prcluster, self.distmeth, self.aggType, self.clusterMeth, self.optimalNBclustMeth)
Esempio n. 17
0
    def corelAllAssays(self, cluc, chepg2, chek293):


        prInterfer = self.prout + "interfer/"
        pathFolder.createFolder(prInterfer)

        # cluster main
        fcluster = open(self.pclusters, "r")
        lclusters = fcluster.readlines()

        dclust = {}
        for clusters in lclusters:
            clusters = clusters.strip().split(",")

            chemID =clusters[0]
            clustname = str(clusters[1]) + "_" + str(clusters[2])
            if not clustname in dclust.keys():
                dclust[clustname] = []

            dclust[clustname].append(chemID)

        #open descriptors
        fdesc = open(self.pdesc, "r")
        lchemdesc = fdesc.readlines()
        fdesc.close()

        ddesc = {}
        for chemdesc in lchemdesc[1:]:
            chemID = chemdesc.split("\t")[0]
            ddesc[chemID] = chemdesc


        for cluster in dclust.keys():
            prclustsub = pathFolder.createFolder(prInterfer + str(cluster) + "/")

            #file descriptors
            pdesc = prclustsub + "desc.csv"
            filedesc = open(pdesc, "w")
            filedesc.write(lchemdesc[0])

            for chemical in dclust[cluster]:
                #png
                ppng = self.prPNG + chemical + ".png"
                if path.exists(ppng):
                    copyfile(ppng, prclustsub + chemical + ".png")
                #desc
                if chemical in ddesc.keys():
                    filedesc.write(ddesc[chemical])
            fdesc.close()

            runExternalSoft.crossA50s(pdesc, cluc.pAC50, chepg2.pAC50, chek293.pAC50, prclustsub)
        return
Esempio n. 18
0
    def extractChemical(self, pSDFTox21):

        prSMI = self.prout + "SMI/"
        pathFolder.createFolder(prSMI)

        prSDF = self.prout + "SDF/"
        pathFolder.createFolder(prSDF)

        # load DB
        db = loadDB.sdfDB(pSDFTox21, "CASRN", self.prout)
        db.parseAll()

        # extract chemical
        lnotfind = []
        for chem in self.lchem:
            # print chem.keys()
            cas = chem["CAS"]
            if cas == "":
                continue

            flag = 0
            for cpdDB in db.lc:
                if cpdDB["CASRN"] == cas:
                    # sdf
                    pfilSDF = prSDF + str(cas) + ".sdf"
                    if not path.exists(pfilSDF):
                        filsdf = open(pfilSDF, "w")
                        filsdf.write(cpdDB["sdf"])
                        filsdf.close()

                    #Smile
                    pfilSMI = prSMI + str(cas) + ".smi"
                    if cpdDB["SMILES"] != "":
                        if not path.exists(pfilSMI) and cpdDB["SMILES"] != "":
                            filSMI = open(pfilSMI, "w")
                            filSMI.write(cpdDB["SMILES"])
                            filSMI.close()
                        flag = 1
                        break

            if flag == 0 and not cas in lnotfind:
                lnotfind.append(cas)

        logfile = open(self.prlog + self.name + "-extract.log", "w")
        logfile.write("\n".join(lnotfind))
        logfile.close()

        self.prSMI = prSMI
        self.prSDF = prSDF
Esempio n. 19
0
    def drawMolecules(self, prpng):

        if not "prsdf" in dir(self):
            self.splitSDF()

        pathFolder.createFolder(prpng)
        self.prpng = prpng

        if len(listdir(self.prsdf)) == len(listdir(self.prpng)):
            return

        lfsdf = listdir(self.prsdf)
        for sdf in lfsdf:
            runExternalSoft.molconvert(self.prsdf + sdf,
                                       self.prpng + sdf[:-3] + "png")
Esempio n. 20
0
    def clusterizeTopActive(self, top):

        prtop = pathFolder.createFolder(self.prout + "top" + str(top) + "/")

        pafftop = prtop + "Aff" + str(top) + ".csv"
        if not path.exists(pafftop):
            # create top descriptor
            daff = toolbox.loadMatrixToDict(self.paff)
            if len(daff.keys()) <= top:
                copyfile(self.paff, pafftop)
            else:
                laff = []
                for chemID in daff.keys():
                    laff.append(float(daff[chemID]["Aff"]))
                minAff = sorted(laff, reverse=True)[top - 1]
                lchem = []
                for chemID in daff.keys():
                    if float(daff[chemID]["Aff"]) >= minAff:
                        lchem.append(chemID)

                filout = open(pafftop, "w")
                filout.write("CHEMBLID\tAff\tType\n")
                for chem in lchem:
                    filout.write("%s\t%s\t%s\n" %
                                 (daff[chem]["CHEMBLID"], daff[chem]["Aff"],
                                  daff[chem]["Type"]))
                filout.close()

        runExternalSoft.clusterize(self.pdesc, pafftop, self.typeAff,
                                   self.cutoff, prtop)
Esempio n. 21
0
    def __init__(self, p_sdf, p_LD50, pr_out):

        self.p_sdf = p_sdf
        self.p_LD50 = p_LD50
        self.pr_out = pathFolder.createFolder(pr_out + "TOX_DB/")

        self.cDB = DBrequest.DBrequest()
    def extractOnlyNewChem(self, name_table, field_comparison):

        pr_out = pathFolder.createFolder(self.pr_out + "updateDSSTOX/")
        p_filout = pr_out + "chem_list.txt"

        if path.exists(p_filout):
            filout = open(p_filout, "r")
            self.l_chem_toadd = filout.read().split("\n")
            filout.close()
            return

        filout = open(p_filout, "w")
        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)
        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")
        # extract list of chemicals in the DB
        l_chem_DB = self.cDB.execCMD("SELECT %s FROM %s" %
                                     (field_comparison, name_table))
        for chem_DB in l_chem_DB:
            chem = chem_DB[0]
            if chem == None:
                continue
            try:
                del d_dsstox_name[chem]
            except:
                pass

        for chem in d_dsstox_name.keys():
            filout.write(chem + "\n")
        filout.close()

        self.l_chem_toadd = list(d_dsstox_name.keys())
    def updateSMILES(self, name_table="chemicals"):
        """Function use to update the chemical table => check if smiles origin change"""

        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")
        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)
        self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/")

        #extract chemical without DTXSID
        # see if chem included
        cmd_SQL = "SELECT id, dsstox_id, smiles_origin, inchikey, smiles_clean FROM %s " % (
            name_table)
        l_chem_DB = self.cDB.execCMD(cmd_SQL)

        d_chem_DB = {}
        for chem_DB in l_chem_DB:
            d_chem_DB[chem_DB[1]] = [
                chem_DB[0], chem_DB[2], chem_DB[3], chem_DB[4]
            ]

        i = 0
        for chem in d_dsstox_SMILES.keys():
            dsstox_id = d_dsstox_SMILES[chem]["dsstox_substance_id"]
            inchkey = d_dsstox_SMILES[chem]["InChI Key_QSARr"]
            smiles = d_dsstox_SMILES[chem]["Original_SMILES"]
            smiles_cleaned = d_dsstox_SMILES[chem]["Canonical_QSARr"]
            try:
                smiles_indb = d_chem_DB[dsstox_id][
                    1]  # case of chemical is not in the DB
            except:
                continue
            inchkey_db = d_chem_DB[dsstox_id][2]
            smiles_cleaned_db = d_chem_DB[dsstox_id][3]
            smiles_db = d_chem_DB[dsstox_id][1]
            if smiles != smiles_db:
                # recompute cleaned SMILES
                c_chem = CompDesc.CompDesc(smiles, self.pr_desc)
                c_chem.prepChem()
                if c_chem.err == 0:
                    c_chem.generateInchiKey()
                else:
                    c_chem.smi = None
                if c_chem.err == 0:
                    inchikey = c_chem.inchikey
                else:
                    inchikey = None

                if d_chem_DB[dsstox_id][2] != inchikey:
                    cmd_sql = "UPDATE %s SET smiles_origin = '%s', smiles_clean = '%s', inchikey='%s' WHERE id='%s';" % (
                        name_table, smiles, c_chem.smi, inchikey,
                        d_chem_DB[dsstox_id][0])

                else:
                    continue  #cmd_sql = "UPDATE %s SET smiles_origin = '%s' WHERE id='%s';"%(name_table, smiles, d_chem_DB[dsstox_id][0])

                #print(smiles_cleaned,smiles_indb, dsstox_id)
                print(i)
                i = i + 1
                self.cDB.updateTable(cmd_sql)

        return
Esempio n. 24
0
def rankCompounds(ptableCluster, prcluster, pMIC_molar, prrank):


    drank = {}
    dMIC = toolbox.loadMatrix(pMIC_molar)
    dcluster = toolbox.loadMatrix(ptableCluster, ",")
    lorga = dMIC[dMIC.keys()[0]].keys()
    del lorga[lorga.index("CMPD_CHEMBLID")]

    for orga in lorga:
        drank[orga] = []

        for chem in dMIC.keys():
            drank[orga].append(float(dMIC[chem][orga]))

    for orga in drank.keys():
        drank[orga] = list(sorted(drank[orga], reverse = False))

    for orga in drank.keys():
        prdata = pathFolder.createFolder(prrank + orga + "/")
        lchem = []
        r = 1
        for MIC in drank[orga]:
            for chem in dMIC.keys():
                if not chem in dcluster.keys():
                    continue
                if float(dMIC[chem][orga]) == float(MIC) and not chem in lchem:
                    print dcluster[chem]
                    print prcluster + "cluster" + str(dcluster[chem]["cluster"]) +  "/" + chem + ".jpeg"
                    copyfile(prcluster + "cluster" + str(dcluster[chem]["cluster"]) +  "/" + chem + ".jpeg", prdata + str(r) + "_" + chem + "_" + str(dcluster[chem]["cluster"]) + ".jpeg")
                    lchem.append(chem)
            r = r + 1
Esempio n. 25
0
    def pushNeighbors(self):
        prneighbor = pathFolder.createFolder(self.prout + "Neighbors/")
        ptable3Dim = prneighbor + "Table_DIM1D2D-2_1.csv"
        ptableNDim = prneighbor + "Table_DIM1D2D-170_207.csv"
        if path.exists(ptable3Dim) and path.exists(ptableNDim):
            ddist3D = toolbox.loadMatrixToDict(ptable3Dim)
            for chem in ddist3D.keys():
                ddist3D[chem] = ddist3D[chem]["Neighbors"].split(" ")
            ddistND = toolbox.loadMatrixToDict(ptableNDim)
            for chem in ddistND.keys():
                ddistND[chem] = ddistND[chem]["Neighbors"].split(" ")

            cDB = DBrequest.DBrequest()
            cDB.verbose = 0
            for chem in ddist3D.keys():
                # print(chem)
                #out1D2D = cDB.getRow("%s_neighbors"%(self.nameMap), "inchikey='%s'" % (chem))
                out1D2D = []
                if out1D2D == []:
                    w3D = "{" + ",".join(
                        ["\"%s\"" % (neighbor)
                         for neighbor in ddist3D[chem]]) + "}"
                    wND = "{" + ",".join(
                        ["\"%s\"" % (neighbor)
                         for neighbor in ddistND[chem]]) + "}"
                    cDB.addElement(
                        "%s_neighbors" % (self.nameMap),
                        ["inchikey", "neighbors_dim3", "neighbors_dimn"],
                        [chem, w3D, wND])
Esempio n. 26
0
    def validationPredictor(self, typeCellChannel, pAC50All):

        dAC50All = toolbox.loadMatrix(pAC50All)

        dCASact = {}
        dpredict = {}
        dCASact[typeCellChannel] = []
        for CASID in dAC50All.keys():  # have to change
            if dAC50All[CASID][typeCellChannel] != "NA":
                dCASact[typeCellChannel].append(CASID)
            if not CASID in dpredict.keys():
                if not path.exists(self.cDB.prSMIclean + CASID + ".smi"):
                    continue
                else:
                    smiles = toolbox.loadSMILES(self.cDB.prSMIclean + CASID +
                                                ".smi")
                    dpredict[CASID] = self.predictSMI(CASID, smiles, plot=1)

        prval = pathFolder.createFolder(self.prout + "validation/" +
                                        typeCellChannel + "/")

        for typeAssay in dCASact.keys():
            channel = "_".join(typeAssay.split("_")[1:])
            cell = typeAssay.split("_")[0]
            kpred = str(cell) + "_" + str(channel)
            ldesc = dpredict[dpredict.keys()[0]][kpred]
            filout = open(prval + typeCellChannel, "w")
            filout.write("CASID" + "\t".join(typeCellChannel) + "\n")
            for CASID in dpredict.keys():
                filout.write(CASID)
                for desc in ldesc:
                    filout.write("\t" + str(dpredict[CASID][kpred][desc]))
                filout.write("\n")
            filout.close()
        return 0
Esempio n. 27
0
    def rankingAC50(self):

        prRank = self.prAnalysis + "ranking/"
        pathFolder.createFolder(prRank)
        fchemAC50 = open(self.pAC50clean, "r")
        lchemac50 = fchemAC50.readlines()
        fchemAC50.close()

        dstock = {}
        lheader = lchemac50[0].strip().split(",")
        i = 1
        dstock["name"] = []
        while i < len(lheader):
            dstock[i] = []
            i += 1

        for chemAC50 in lchemac50[1:]:
            lelem = chemAC50.strip().split(",")
            name = lelem[0]
            dstock["name"].append(name)

            i = 1
            while i < len(lelem):
                dstock[i].append(lelem[i])
                i += 1

        for i in range(1, len(lheader)):
            dstock[i] = toolbox.rankList(dstock[i])

        # generate 3 files ordered differently
        for i in range(1, len(lheader)):
            prank = prRank + str(lheader[i].replace("\"", "")) + "_rank.txt"
            frank = open(prank, "w")
            frank.write(lchemac50[0])

            lisorted = sorted(range(len(dstock[i])),
                              key=lambda k: dstock[i][k])

            for isorted in lisorted:
                j = 1
                frank.write(str(dstock["name"][isorted]))
                while j < len(lheader):
                    frank.write("," + str(dstock[j][isorted]))
                    j += 1
                frank.write("\n")
            frank.close()
Esempio n. 28
0
    def splitSDF(self):

        if not "lc" in dir(self):
            self.parseAll()

        prSDF = self.prout + "cpdsdf/"
        pathFolder.createFolder(prSDF)
        self.prsdf = prSDF

        if len(self.lc) == len(listdir(prSDF)):
            return
        else:
            for compound in self.lc:
                pfilout = self.prsdf + compound[self.name] + ".sdf"
                filout = open(pfilout, "w")
                filout.write(compound["sdf"])
                filout.close()
    def __init__(self, name_update, pr_OPERA_preproc, pr_out):
        self.name_update = name_update
        self.pr_OPERA_preproc = pr_OPERA_preproc
        self.pr_out = pr_out

        self.loadOPERAFileInClass()
        self.cDB = DBrequest.DBrequest(verbose=0)
        self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/")
Esempio n. 30
0
    def __init__(self, pfilin, curvecutoff, effcutoff, curvePositive,
                 curveNegative, prcytox, prout, prlog):
        self.pfilin = pfilin
        self.prcytox = prcytox
        self.name = pfilin.split("/")[-1].split(".")[0]
        self.loadAssay()
        proutSP = prout + pfilin.split("/")[-1].split(".")[0] + "/"
        pathFolder.createFolder(proutSP)

        self.proutSP = proutSP
        self.prout = prout
        self.prlog = prlog

        self.curveCutoff = curvecutoff
        self.curvePositive = curvePositive
        self.curveNegative = curveNegative

        self.effcutoff = effcutoff