Beispiel #1
0
    def Ttest(self, pAC50All, presult):

        dAC50 = toolbox.loadMatrix(pAC50All, sep="\t")
        ddesc = toolbox.loadMatrix(self.pdesc1D2Dclean, sep=",")

        print ddesc.keys()[:20]
        print dAC50.keys()[:20]

        runExternalSoft.TtestDesc(self.pdesc1D2Dclean, pAC50All, presult)
Beispiel #2
0
    def computeOpera(self, update):

        if "opera" in self.__dict__:
            return 1
        else:
            # check if descriptors already computed
            pdes = self.prDesc + self.name + ".txt"
            if path.exists(pdes) and path.getsize(pdes) > 10 and update == 0:
                filin = open(pdes, "r")
                llines = filin.readlines()
                filin.close()
                ldesc = llines[0].strip().split("\t")[1:]
                lval = llines[1].strip().split("\t")[1:]
                ddes = {}
                i = 0
                while i < len(ldesc):
                    ddes[ldesc[i]] = lval[i]
                    i += 1
                self.allDesc = ddes
                self.log = self.log + "Desc already computed -> " + pdes + "\n"
                return 0

            dopera = {}

            prOPERA = pathFolder.createFolder(self.prDesc + "OPERA/" +
                                              self.name + "/")
            molH = Chem.AddHs(self.mol)

            psdf = prOPERA + str(self.name) + ".sdf"
            filsdf = open(psdf, "w")
            filsdf.write(Chem.MolToMolBlock(molH))
            filsdf.close()

            pdesc2D = runExternalSoft.runPadel(prOPERA)

            ddesc2D = toolbox.loadMatrix(pdesc2D, sep=",")
            transformOPERAList(ddesc2D)
            for desc2D in ddesc2D[ddesc2D.keys()[0]].keys():
                if desc2D in LOPERA:
                    dopera[desc2D] = ddesc2D[ddesc2D.keys()[0]][desc2D]

            lpdesc = runExternalSoft.runOPERA(psdf, pdesc2D, prOPERA)

            for pdesc in lpdesc:
                try:
                    ddesc = toolbox.loadMatrix(pdesc, ",")
                except:
                    print pdesc
                    dddd
                for desc in ddesc[ddesc.keys()[0]].keys():
                    if desc in LOPERA:
                        dopera[desc] = ddesc[ddesc.keys()[0]][desc]

            self.opera = dopera
            self.allDesc.update(deepcopy(self.opera))
Beispiel #3
0
def parsepdf(prcytox, prresult):

    ppdf = prcytox + "toxsci-15-0719-File012.pdf"
    ptable = prcytox + "toxsci-15-0719-File009.csv"

    pfilout = prresult + "cytox.csv"
    if path.exists(pfilout):
        dout = toolbox.loadMatrix(pfilout, sep="\t")
        return dout

    dtable = toolbox.loadMatrix(ptable, sep=",")
    lCASID = []
    for chem in dtable.keys():
        lCASID.append(dtable[chem]["CASRN"])

    dout = {}
    fpdf = open(ppdf, "rb")
    pdfReader = PdfFileReader(fpdf)
    nbpage = pdfReader.getNumPages()
    #nbpage = 32

    i = 0
    while i < nbpage:
        pageObj = pdfReader.getPage(i)
        pageText = pageObj.extractText()
        llines = pageText.split("\n")

        for line in llines:
            for CASID in lCASID:
                if search(CASID, line):
                    dout[CASID] = {}
                    CAStemp = CASID
                    break
            if search("cytotox min=", line):
                cytoxmin = findall(r"[-+]?\d*\.\d+|\d+", line)
                #print cytoxmin
                dout[CAStemp]["CytoxMin"] = cytoxmin[0]

            if search("cytotox median=", line):
                #print line
                cytoxMed = findall(r"[-+]?\d*\.\d+|\d+", line)
                #print cytoxMed
                dout[CAStemp]["CytoxMedian"] = cytoxMed[3]
        i += 1

    filout = open(pfilout, "w")
    filout.write("CAS\tCytoxMin\tCytoxMedian\n")
    for CASID in dout.keys():
        filout.write(
            str(CASID) + "\t" + str(dout[CASID]["CytoxMin"]) + "\t" +
            str(dout[CASID]["CytoxMedian"]) + "\n")
    filout.close()

    return dout
Beispiel #4
0
def mergeDescInvolve(prin, ML, nbdesc, prout):

    dimportance = {}

    lprrun = listdir(prin)
    for prrun in lprrun:
        if prrun == "Average" or prrun == "descImportance" or prrun == "Prob":
            continue

        lprcell = listdir(prin + "/" + prrun + "/")
        for prcell in lprcell:
            if not prcell in dimportance.keys():
                dimportance[prcell] = {}
            dimportance[prcell][prrun] = {}
            pimportance = prin + prrun + "/" + prcell + "/" + str(
                ML) + "class/ImportanceDesc"

            if path.exists(pimportance):
                ddescimportance = toolbox.loadMatrix(pimportance, sep="\t")
                dimportance[prcell][prrun] = ddescimportance
            else:
                pmodel = prin + prrun + "/" + prcell + "/" + str(
                    ML) + "class/model.RData"
                ptrain = prin + prrun + "/" + prcell + "/trainSet.csv"
                if path.exists(pmodel):
                    runExternalSoft.createImportanceTable(
                        pmodel, ML, ptrain,
                        prin + prrun + "/" + prcell + "/" + str(ML) + "class/")
                    ddescimportance = toolbox.loadMatrix(pimportance, sep="\t")
                    dimportance[prcell][prrun] = ddescimportance

    # write global table
    for typeAssay in dimportance.keys():
        pdesc = prout + "Importance" + str(ML) + "_" + typeAssay
        fdesc = open(pdesc, "w")
        lrun = dimportance[typeAssay].keys()

        ldesc = dimportance[typeAssay][lrun[0]].keys()
        fdesc.write("Desc\tRun\tval\n")
        for desc in ldesc:
            for run in lrun:
                try:
                    fdesc.write(desc + "\t" + str(run) + "\t" +
                                str(dimportance[typeAssay][run][desc]["x"]) +
                                "\n")
                except:
                    fdesc.write(desc + "\t" + str(run) + "\t0.0\n")
        fdesc.close()

        runExternalSoft.runImportanceDesc(pdesc, nbdesc)

    return 0
Beispiel #5
0
    def parseCuratedDataset(self, pr_in):

        p_dataset_MIC = pr_in + "MIC-curated_mol.csv"
        # control the file with dataset excite
        if not path.exists(p_dataset_MIC):
            return 1
        else:
            self.pMIC = p_dataset_MIC

        # load in tableorgafull
        l_dchem = toolbox.tableTolist(p_dataset_MIC)
        l_orga = l_dchem[0].keys()
        l_orga.remove("CMPD_CHEMBLID")
        l_orga.remove("SMILES")

        dout = {}
        for orga in l_orga:
            # load chem by orga
            p_orga = pr_in + orga + ".csv"
            dorga = toolbox.loadMatrix(p_orga)

            dout[orga] = dorga
            for dchem in l_dchem:
                chemID = dchem["CMPD_CHEMBLID"]
                dout[orga][chemID]["pMIC"] = dchem[orga]
                dout[orga][chemID]["SMILES"] = dchem["SMILES"]
        
        self.tableorgafull = dout
Beispiel #6
0
def formatPubChemTable(pfilin, PRPUBCHEM, prout, update=0):

    pfilout = prout + "tableSmi.csv"
    if path.exists(pfilout) and update == 0:
        return pfilout
    else:
        filout = open(pfilout, "w")
        filout.write("ID\tSMILES\tActive\n")
        dchem = toolbox.loadMatrix(pfilin, sep=",")
        #print dchem.keys()

        for chemID in dchem.keys():
            cpubmed = Chem(chemID, PRPUBCHEM)
            SMILES = cpubmed.getSMILE()
            if search("Error", SMILES):
                continue

            if chemID == "RESULT_IS_ACTIVE_CONCENTRATION" or chemID == "RESULT_UNIT":
                continue
            #print SMILES

            # add filter
            #print dchem[chemID]["Log of MaxDyeEquivalency"], dchem[chemID]["PUBCHEM_ACTIVITY_OUTCOME"]
            if "Log of MaxDyeEquivalency" in dchem[chemID].keys():
                if float(dchem[chemID]["Log of MaxDyeEquivalency"]) < -7:
                    dchem[chemID]["PUBCHEM_ACTIVITY_OUTCOME"] = "Inactive"
            filout.write(
                "%s\t%s\t%s\n" %
                (chemID, SMILES, dchem[chemID]["PUBCHEM_ACTIVITY_OUTCOME"]))
        filout.close()
    return pfilout
Beispiel #7
0
    def validationPredictor(self, typeCellChannel, pAC50All):

        dAC50All = toolbox.loadMatrix(pAC50All)

        dCASact = {}
        dpredict = {}
        dCASact[typeCellChannel] = []
        for CASID in dAC50All.keys():  # have to change
            if dAC50All[CASID][typeCellChannel] != "NA":
                dCASact[typeCellChannel].append(CASID)
            if not CASID in dpredict.keys():
                if not path.exists(self.cDB.prSMIclean + CASID + ".smi"):
                    continue
                else:
                    smiles = toolbox.loadSMILES(self.cDB.prSMIclean + CASID +
                                                ".smi")
                    dpredict[CASID] = self.predictSMI(CASID, smiles, plot=1)

        prval = pathFolder.createFolder(self.prout + "validation/" +
                                        typeCellChannel + "/")

        for typeAssay in dCASact.keys():
            channel = "_".join(typeAssay.split("_")[1:])
            cell = typeAssay.split("_")[0]
            kpred = str(cell) + "_" + str(channel)
            ldesc = dpredict[dpredict.keys()[0]][kpred]
            filout = open(prval + typeCellChannel, "w")
            filout.write("CASID" + "\t".join(typeCellChannel) + "\n")
            for CASID in dpredict.keys():
                filout.write(CASID)
                for desc in ldesc:
                    filout.write("\t" + str(dpredict[CASID][kpred][desc]))
                filout.write("\n")
            filout.close()
        return 0
Beispiel #8
0
def loadAllOperaDesc(pOperaDesc):
    dDTX = toolbox.loadMatrix(pOperaDesc, ',')
    dCAS = {}
    for DTXID in dDTX.keys():
        CASID = dDTX[DTXID]["CASRN"]
        dCAS[CASID] = {}
        for desc in chemical.LOPERA:
            if dDTX[DTXID][desc] == "NaN":
                dDTX[DTXID][desc] = "NA"
            dCAS[CASID][desc] = dDTX[DTXID][desc]
    return dCAS
def get_PNGAndSMI(p_desc, pr_results):

    # load the ddesc to have SMILES cleanned
    ddesc = toolbox.loadMatrix(p_desc)
    pr_smi = pathFolder.createFolder(pr_results + "SMI/")
    pr_png = pathFolder.createFolder(pr_results + "PNG/")

    for chemID in ddesc.keys():
        print chemID
        SMILES = ddesc[chemID]["SMILES"]
        p_fsmi = pr_smi + chemID + ".smi"
        fsmi = open(p_fsmi, "w")
        fsmi.write(SMILES)
        fsmi.close()

        p_fpng = pr_png + chemID + ".png"
        runExternalSoft.molconvert(p_fsmi, p_fpng)

    return
 def loadData(self):
     self.d_MIC = toolbox.loadMatrix(self.p_MIC)
     self.d_cluster = toolbox.loadMatrix(self.p_cluster, sep=",")
Beispiel #11
0
def mergeResults(prin, prout):

    dresult = {}
    dperf = {}
    dperf["Acc"] = []
    dperf["Sp"] = []
    dperf["Se"] = []
    dperf["MCC"] = []

    lprrun = listdir(prin)
    for prrun in lprrun:
        if prrun == "Average" or prrun == "descImportance":
            continue
        lprcell = listdir(prin + "/" + prrun + "/")
        for prcell in lprcell:
            pperfCV = prin + "/" + prrun + "/" + prcell + "/perfCV.csv"
            pperftrain = prin + "/" + prrun + "/" + prcell + "/perfTrain.csv"
            pperftest = prin + "/" + prrun + "/" + prcell + "/perfTest.csv"

            try:
                MCV = toolbox.loadMatrix(pperfCV, sep=",")
                Mtrain = toolbox.loadMatrix(pperftrain, sep=",")
                Mtest = toolbox.loadMatrix(pperftest, sep=",")
            except:
                continue

            lML = MCV.keys()
            lcriteria = dperf.keys()
            lset = ["CV", "train", "test"]
            # create the structures
            if not prcell in dresult.keys():
                dresult[prcell] = {}
                dresult[prcell]["CV"] = {}
                dresult[prcell]["train"] = {}
                dresult[prcell]["test"] = {}
                for ML in lML:
                    dresult[prcell]["CV"][ML] = deepcopy(dperf)
                    dresult[prcell]["train"][ML] = deepcopy(dperf)
                    dresult[prcell]["test"][ML] = deepcopy(dperf)

            for ML in lML:
                for criteria in lcriteria:
                    dresult[prcell]["CV"][ML][criteria].append(
                        float(MCV[ML][criteria]))
                    dresult[prcell]["train"][ML][criteria].append(
                        float(Mtrain[ML][criteria]))
                    dresult[prcell]["test"][ML][criteria].append(
                        float(Mtest[ML][criteria]))

    dout = deepcopy(dresult)
    for celltype in dresult.keys():
        for set in dresult[celltype].keys():
            for ML in dresult[celltype][set].keys():
                for criteria in dresult[celltype][set][ML].keys():

                    AV = round(mean(dresult[celltype][set][ML][criteria]), 3)
                    SD = round(std(dresult[celltype][set][ML][criteria]), 3)

                    dout[celltype][set][ML][criteria] = [AV, SD]

    # write result
    lperfcriteria = ["Acc", "Sp", "Se", "MCC"]
    for celltype in dout.keys():
        pfilout = prout + celltype + ".csv"
        filout = open(pfilout, "w")
        for set in dout[celltype].keys():
            filout.write(str(set) + "\n")
            filout.write("\t" + "\t".join(
                ["M-" + str(c) + "\t" + "SD-" + str(c)
                 for c in lperfcriteria]) + "\n")
            for ML in dout[celltype][set].keys():
                filout.write(ML)
                for criteria in lperfcriteria:
                    filout.write("\t" +
                                 str(dout[celltype][set][ML][criteria][0]) +
                                 "\t" +
                                 str(dout[celltype][set][ML][criteria][1]))
                filout.write("\n")
        filout.close()

    return 0
Beispiel #12
0
    def computeMatrixMCS(self, kID="CMPD_CHEMBLID", kSMILES="CANONICAL_SMILES"):

        pfiloutTanimoto = self.prout + "tanimoto"
        pfiloutNBatomMax = self.prout + "maxAtom"

        if path.exists(pfiloutTanimoto) and path.exists(pfiloutNBatomMax):
            print "in"
            dMCSTanimoto = toolbox.loadMatrix(pfiloutTanimoto)
            dMCSMax = toolbox.loadMatrix(pfiloutNBatomMax)
            self.MCSTanimoto = dMCSTanimoto
            self.MCSMax= dMCSMax

        else:
            lcmpdID = [self.sdata[i][kID] for i in range(0, len(self.sdata))]
            i = 0
            imax = len(self.sdata)
            dTanimoto = {}
            dMaxMCS = {}
            while i < imax:
                j = i
                while j < imax:
                    print i,j
                    if not self.sdata[i][kID] in dTanimoto.keys():
                        dTanimoto[self.sdata[i][kID]] = {}
                        dMaxMCS[self.sdata[i][kID]] = {}
                    if not self.sdata[j][kID] in dTanimoto[self.sdata[i][kID]].keys():
                        ltanimoto_max = get_Tanimoto(self.sdata[i][kSMILES], self.sdata[j][kSMILES])
                        dMaxMCS[self.sdata[i][kID]][self.sdata[j][kID]] = ltanimoto_max[1]
                        dTanimoto[self.sdata[i][kID]][self.sdata[j][kID]] = ltanimoto_max[0]
                    j += 1
                i += 1

            filoutTanimoto = open(self.prout + "tanimoto", "w")
            filoutNBatomMax = open(self.prout + "maxAtom", "w")

            filoutTanimoto.write("\t".join(lcmpdID) + "\n")
            filoutNBatomMax.write("\t".join(lcmpdID) + "\n")

            for cmpdID1 in lcmpdID:
                lwTanimoto = []
                lwMax = []
                for cmpdID2 in lcmpdID:
                    try: lwTanimoto.append(str(dTanimoto[cmpdID1][cmpdID2]))
                    except: lwTanimoto.append(str(dTanimoto[cmpdID2][cmpdID1]))
                    try: lwMax.append(str(dMaxMCS[cmpdID1][cmpdID2][1]))
                    except: lwMax.append(str(dMaxMCS[cmpdID2][cmpdID1][1]))
                filoutTanimoto.write(cmpdID1 + "\t" + "\t".join(lwTanimoto) + "\n")
                filoutNBatomMax.write(cmpdID1 + "\t" + "\t".join(lwMax) + "\n")
            filoutTanimoto.close()
            filoutNBatomMax.close()

        paff = self.prout + "aff"
        if not "Aff" in dir(self):
            daff = {}
            for compound in self.sdata:
                daff[compound[kID]] = compound["PCHEMBL_VALUE"]
            self.Aff = daff


        if not path.exists(paff):
            filoutaff = open(paff, "w")
            filoutaff.write("pchem affinity\n")
            for compound in self.sdata:
                filoutaff.write(str(compound[kID]) + "\t" + str(compound["PCHEMBL_VALUE"]) + "\n")
            filoutaff.close()

        # plot matrix
        runExternalSoft.MDSMCS(pfiloutTanimoto, paff)
Beispiel #13
0
def computeDesc(passay, PRDESC, PRSMI, prout, nbfile=1, update=0):

    # by pass
    pdescout = prout + "descMat"
    paff = prout + "aff.txt"
    if path.exists(pdescout) and update == 0 and nbfile == 1:
        return pdescout
    elif path.exists(pdescout) and update == 0 and nbfile == 2 and path.exists(
            paff):
        return [pdescout, paff]

    dchem = toolbox.loadMatrix(passay)
    lchemID = dchem.keys()
    try:
        lchemID.remove("RESULT_UNIT")
    except:
        pass
    shuffle(lchemID)
    i = 0
    nbi = len(lchemID)
    while i < nbi:
        if search("error", dchem[lchemID[i]]
                  ["SMILES"].lower()):  # case of the table is computed before
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            continue

        if dchem[lchemID[i]]["Active"] == "Inconclusive" or search(
                "Error", dchem[lchemID[i]]["SMILES"]):
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            continue

        # compute descriptors
        cchem = chemical.chemical(lchemID[i], dchem[lchemID[i]]["SMILES"])
        cchem.prepareChem(PRSMI)
        if search("error", cchem.log.lower()):
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            continue

        cchem.compute1D2DDesc(PRDESC)
        if search("error", cchem.log.lower()):
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            continue

        cchem.computeOpera(update=update)
        if search("error", cchem.log.lower()):
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            i = i - 1
            continue

        cchem.writeTablesDesc(PRDESC, update=update)

        i = i + 1

    if nbfile == 1:
        fildesc = open(pdescout, "w")
        ldesc = chemical.getLdesc("1D2D", 1) + chemical.getLdesc("Opera", 0)
        fildesc.write("ID," + ",".join(ldesc) + ",Aff" + "\n")

        for chemID in lchemID:
            print chemID
            if dchem[chemID]["Active"] == "Active":
                aff = 1
            else:
                aff = 0
            pdesc = PRDESC + chemID + ".txt"
            if path.exists(pdesc):
                ddesc = toolbox.loadMatrix(pdesc)
                lval = []
                for desc in ldesc:
                    if not desc in ddesc[chemID].keys():
                        lval.append("NA")
                    else:
                        lval.append(str(ddesc[chemID][desc]))

                fildesc.write(chemID + "," + ",".join(lval) + "," + str(aff) +
                              "\n")
        fildesc.close()
        return pdescout

    else:

        fildesc = open(pdescout, "w")
        paff = prout + "aff.txt"
        filaff = open(paff, "w")
        ldesc = chemical.getLdesc("1D2D", 1) + chemical.getLdesc("Opera", 0)
        fildesc.write("ID," + ",".join(ldesc) + "\n")
        filaff.write("ID\tAff\n")

        for chemID in lchemID:
            print chemID
            if dchem[chemID]["Active"] == "Active":
                aff = 1
            else:
                aff = 0
            pdesc = PRDESC + chemID + ".txt"
            if path.exists(pdesc):
                ddesc = toolbox.loadMatrix(pdesc)
                lval = []
                for desc in ldesc:
                    if not desc in ddesc[chemID].keys():
                        lval.append("NA")
                    else:
                        lval.append(str(ddesc[chemID][desc]))

                fildesc.write(chemID + "," + ",".join(lval) + "," + str(aff) +
                              "\n")
                filaff.write(chemID + "\t" + str(aff) + "\n")
        fildesc.close()
        filaff.close()

        return [pdescout, paff]
Beispiel #14
0
def mergeProba(prin, ML, prout):

    dprob = {}
    dreal = {}

    lprrun = listdir(prin)
    for prrun in lprrun:
        if prrun == "Average" or prrun == "descImportance" or prrun == "Prob":
            continue

        lprcell = listdir(prin + "/" + prrun + "/")
        for prcell in lprcell:
            if not prcell in dreal.keys():
                dreal[prcell] = {}
            flag = 0
            for filin in listdir(prin + prrun + "/" + prcell + "/"):
                if search("AC50_", filin):
                    paff = prin + prrun + "/" + prcell + "/" + filin
                    flag = 1
                    break
            daff = toolbox.loadMatrix(paff, sep="\t")
            dreal[prcell].update(deepcopy(daff))

            if not prcell in dprob.keys():
                dprob[prcell] = {}
            dprob[prcell][prrun] = {}
            pCV = prin + prrun + "/" + prcell + "/" + str(
                ML) + "class/PerfRFClassCV10.txt"
            dCV = toolbox.loadMatrix(pCV)
            dprob[prcell][prrun]["CV"] = dCV

            ptrain = prin + prrun + "/" + prcell + "/" + str(
                ML) + "class/classTrain.csv"
            dtrain = toolbox.loadMatrix(ptrain, sep=",")
            dprob[prcell][prrun]["train"] = dtrain

            ptest = prin + prrun + "/" + prcell + "/" + str(
                ML) + "class/classTest.csv"
            dtest = toolbox.loadMatrix(ptest, sep=",")
            dprob[prcell][prrun]["test"] = dtest

        print dreal[prcell].keys()
        print len(dreal[prcell].keys())

    # write table for probability
    dw = {}
    for prcell in dprob.keys():
        dw[prcell] = {}
        dw[prcell] = {}
        dw[prcell]["train"] = {}
        dw[prcell]["test"] = {}
        dw[prcell]["CV"] = {}

        for run in dprob[prcell].keys():
            for IDtrain in dprob[prcell][run]["train"].keys():
                if not IDtrain in dw[prcell]["train"].keys():
                    dw[prcell]["train"][IDtrain] = []
                dw[prcell]["train"][IDtrain].append(
                    float(dprob[prcell][run]["train"][IDtrain]["x"]))

            for IDtest in dprob[prcell][run]["test"]:
                if not IDtest in dw[prcell]["test"].keys():
                    dw[prcell]["test"][IDtest] = []
                dw[prcell]["test"][IDtest].append(
                    float(dprob[prcell][run]["test"][IDtest]["x"]))

            for IDCV in dprob[prcell][run]["CV"]:
                if not IDCV in dw[prcell]["CV"].keys():
                    dw[prcell]["CV"][IDCV] = []
                dw[prcell]["CV"][IDCV].append(
                    float(dprob[prcell][run]["CV"][IDCV]["Predict"]))

    for prcell in dw.keys():

        # train
        pfiloutTrain = prout + prcell + "_train"
        filoutTrain = open(pfiloutTrain, "w")
        filoutTrain.write("ID\tMpred\tSDpred\tReal\n")
        for IDtrain in dw[prcell]["train"].keys():
            try:
                filoutTrain.write("%s\t%.3f\t%.3f\t%s\n" %
                                  (IDtrain, mean(dw[prcell]["train"][IDtrain]),
                                   std(dw[prcell]["train"][IDtrain]),
                                   dreal[prcell][IDtrain]["Aff"]))
            except:
                print dw[prcell]["train"][IDtrain]
                print dreal[prcell][IDtrain]["Aff"]
                ddd
        filoutTrain.close()

        runExternalSoft.plotAC50VSProb(pfiloutTrain, prout)

        #test
        pfiloutTest = prout + prcell + "_test"
        filoutTest = open(pfiloutTest, "w")
        filoutTest.write("ID\tMpred\tSDpred\tReal\n")
        for IDtest in dw[prcell]["test"].keys():
            filoutTest.write("%s\t%.3f\t%.3f\t%s\n" %
                             (IDtest, mean(dw[prcell]["test"][IDtest]),
                              std(dw[prcell]["test"][IDtest]),
                              dreal[prcell][IDtest]["Aff"]))
        filoutTest.close()

        runExternalSoft.plotAC50VSProb(pfiloutTest, prout)

        #CV
        pfiloutCV = prout + prcell + "_CV"
        filoutCV = open(pfiloutCV, "w")
        filoutCV.write("ID\tMpred\tSDpred\tReal\n")
        for IDCV in dw[prcell]["CV"].keys():
            filoutCV.write(
                "%s\t%.3f\t%.3f\t%s\n" %
                (IDCV, mean(dw[prcell]["CV"][IDCV]), std(
                    dw[prcell]["CV"][IDCV]), dreal[prcell][IDCV]["Aff"]))
        filoutCV.close()

        runExternalSoft.plotAC50VSProb(pfiloutCV, prout)

    return 0
Beispiel #15
0
    def writeClassActive(self):

        from random import shuffle

        print self.dpresult
        print self.pAC50All
        dAC50All = toolbox.loadMatrix(self.pAC50All, sep="\t")

        for typeAC50 in self.dpresult:
            pclass = self.dpresult[typeAC50] + "actClass.txt"

            if path.exists(pclass) and path.getsize(pclass) > 10:
                self.dpAC50[typeAC50] = pclass

            else:
                filin = open(self.dpAC50[typeAC50], "r")
                llines = filin.readlines()
                filin.close()

                filout = open(pclass, "w")
                filout.write(llines[0])

                # shuffle lines
                llines = llines[1:]
                shuffle(llines)

                nbact = 0
                for lineChem in llines:
                    AC50 = lineChem.strip().split("\t")[-1]
                    if AC50 != "NA":
                        nbact = nbact + 1

                nbinact = int(100 * nbact / (100 * self.ratioAct)) - nbact

                # select active chemical
                llineAct = []
                for lineChem in llines[1:]:
                    lAC50 = lineChem.strip().split("\t")
                    lnew = [lAC50[0]]
                    for AC50 in lAC50[1:]:
                        if AC50 != "NA":
                            lnew.append("1")
                            llineAct.append("\t".join(lnew))

                # select inactive but select active for other channel
                if typeAC50 != "Luc_IC50":
                    # add channel active in the set
                    llineInact = []
                    for CASID in dAC50All.keys():
                        if dAC50All[CASID][self.cell + "_" + typeAC50] != "NA":
                            continue
                        else:
                            for channel in dAC50All[CASID].keys():
                                if not search("Luc_IC50", channel):
                                    if channel != "CASID":
                                        if dAC50All[CASID][channel] != "NA":
                                            lnew = [CASID, "0"]
                                            llineInact.append("\t".join(lnew))
                                            break

                # random active
                nbinactselected = len(llineInact)
                print nbinact, nbinactselected

                if nbinactselected >= nbinact:
                    shuffle(llineInact)
                    llineInact = llineInact[:nbinact]
                    lw = llineAct + llineInact
                    shuffle(lw)
                else:
                    nwinact = nbinactselected

                    # first loop to take inactive
                    for lineChem in llines[1:]:
                        lAC50 = lineChem.strip().split("\t")
                        lnew = [lAC50[0]]
                        for AC50 in lAC50[1:]:
                            if AC50 == "NA":
                                lnew.append("0")
                                lneww = "\t".join(lnew)
                                if not lneww in llineInact:
                                    llineInact.append(lneww)
                                    nwinact += 1
                                    break

                        if nwinact >= nbinact:
                            break
                lw = llineAct + llineInact
                shuffle(lw)

                filout.write("\n".join(lw))
                filout.close()
                self.dpAC50[typeAC50] = pclass
Beispiel #16
0
    def prepDataColor(self):

        # format by type of AC50
        # change self with one folder by type of AC50

        presult = pathFolder.createFolder(self.prresult + self.cell + "/")
        pClass = presult + "AC50_" + str(self.cell)

        # by pass
        if path.exists(presult + "trainSet.csv") and path.exists(
                presult + "testSet.csv") and path.exists(pClass):
            dtrain = {}
            dtrain[self.cell] = presult + "trainSet.csv"

            dtest = {}
            dtest[self.cell] = presult + "testSet.csv"

            self.dptrain = dtrain
            self.dptest = dtest

            self.dpAC50 = {}
            self.dpAC50[self.cell] = pClass

            self.dpresult = {}
            self.dpresult[self.cell] = presult
            return 0

        from random import shuffle

        color = self.cell + "_n"
        dAC50 = toolbox.loadMatrix(self.pAC50All, sep="\t")

        fclass = open(pClass, "w")
        fclass.write("CAS\tAff\n")

        lCASID = dAC50.keys()[1:]  # remove ""
        shuffle(lCASID)

        lact = []
        linact = []
        for CASID in lCASID:
            flagAct = 0
            for channel in dAC50[CASID].keys():
                if search(color, channel):
                    #print dAC50[CASID][channel]
                    if dAC50[CASID][channel] != "NA":
                        lact.append(str(CASID) + "\t1")
                        flagAct = 1
                        break
            if flagAct == 0:
                linact.append(str(CASID) + "\t0")

        nbinact = int(100 * len(lact) / (100 * self.ratioAct)) - len(lact)

        lw = lact + linact[:nbinact]
        shuffle(lw)

        fclass.write("\n".join(lw))
        fclass.close()

        runExternalSoft.prepDataQSAR(self.pdesc, pClass, presult, self.corval,
                                     self.maxQauntile, self.splitRatio,
                                     self.nbNA, "0")

        dtrain = {}
        dtrain[self.cell] = presult + "trainSet.csv"

        dtest = {}
        dtest[self.cell] = presult + "testSet.csv"

        self.dptrain = dtrain
        self.dptest = dtest

        self.dpAC50 = {}
        self.dpAC50[self.cell] = pClass

        self.dpresult = {}
        self.dpresult[self.cell] = presult
Beispiel #17
0
    def prepData(self, typeData):
        # format by type of AC50
        # change self with one folder by type of AC50

        dAC50 = toolbox.loadMatrix(self.pAC50, sep="\t")

        dfileAC50 = {}
        dprresult = {}

        imax = len(self.lchannel)
        i = 0
        while i < imax:
            AC50type = self.lchannel[i]
            presult = pathFolder.createFolder(self.prresult + AC50type + "/")
            dprresult[AC50type] = presult

            dfileAC50[AC50type] = open(presult + "AC50_" + str(AC50type), "w")
            dfileAC50[AC50type].write("CAS\tAff\n")

            i += 1

        for CAS in dAC50.keys():
            for channel in self.lchannel:
                dfileAC50[channel].write(
                    str(CAS) + "\t" + str(dAC50[CAS][channel]) + "\n")

        for typeAC50 in self.lchannel:
            dfileAC50[typeAC50].close()
            dfileAC50[typeAC50] = dfileAC50[typeAC50].name

        self.dpAC50 = dfileAC50
        self.dpresult = dprresult

        dtrain = {}
        dtest = {}
        for typeAC50 in self.dpAC50.keys():
            if self.typeQSAR == "Reg":
                runExternalSoft.prepDataQSAR(self.pdesc, self.dpAC50[typeAC50],
                                             self.dpresult[typeAC50],
                                             self.corval, self.maxQauntile,
                                             self.splitRatio, self.nbNA)

            else:
                if typeData == "all":
                    self.writeClass()
                elif typeData == "active":
                    self.writeClassActive()

                ptrain = self.dpresult[typeAC50] + "trainSet.csv"
                ptest = self.dpresult[typeAC50] + "testSet.csv"

                print ptrain
                print ptest

                if not path.exists(ptrain) and not path.exists(ptest):
                    runExternalSoft.prepDataQSAR(self.pdesc,
                                                 self.dpAC50[typeAC50],
                                                 self.dpresult[typeAC50],
                                                 self.corval, self.maxQauntile,
                                                 self.splitRatio, self.nbNA)

            dtrain[typeAC50] = ptrain
            dtest[typeAC50] = ptest

        self.dptrain = dtrain
        self.dptest = dtest
Beispiel #18
0
    def predictSMI(self, nameChemical, smiles, plot=0, verbose=0):

        dpred = {}
        prresult = pathFolder.createFolder(self.prout + nameChemical + "/")

        ppred = prresult + "pred"
        if path.exists(ppred):
            dpred = toolbox.loadMatrix(ppred)
            return dpred

        chem = chemical.chemical(nameChemical, smiles)
        chem.prepareChem(prresult)
        chem.compute1D2DDesc(prresult)
        chem.writeTablesDescCAS(prresult)
        chem.computeFP(typeFP="All")

        for channel in self.dcluster:
            for cell in self.dcluster[channel].keys():
                kpred = str(cell) + "_" + str(channel)
                dpred[kpred] = {}
                for typeDesc in self.dcluster[channel][cell].keys():
                    if verbose == 1:
                        print channel, cell, typeDesc
                        print self.dcluster[channel][cell].keys()
                    if search("Desc", typeDesc):
                        distMeth = typeDesc.split("-")[1]
                        aggMeth = typeDesc.split("-")[2]

                        enrichment = runExternalSoft.findCluster(
                            self.cDB.pdesc1D2Dclean, chem.pdesc,
                            self.dcluster[channel][cell][typeDesc]["files"][0],
                            self.dcluster[channel][cell][typeDesc]["files"][1],
                            distMeth, aggMeth)

                    else:
                        # generate FP
                        typeFP = typeDesc.split("-")[0]
                        metric = typeDesc.split("-")[-1].split("_")[0]
                        metricAgg = typeDesc.split("-")[-1]
                        if verbose == 1: print typeFP, metric
                        dFP = {}
                        for CASID in self.cDB.dFP.keys():
                            if verbose == 1:
                                print self.cDB.dFP[CASID]
                                print chem.FP[typeFP]
                                print metric
                            dFP[CASID] = float(
                                toolbox.computeSimilarityFP(
                                    self.cDB.dFP[CASID][typeFP],
                                    chem.FP[typeFP], metric))
                        maxSim = max(dFP.values())
                        i = 0
                        imax = len(dFP.keys())
                        lCAS = dFP.keys()
                        while i < imax:
                            if float(dFP[lCAS[i]] == maxSim):
                                CASclose = lCAS[i]
                            i += 1
                        if verbose == 1:
                            print CASclose
                            print channel, cell
                            print self.ChemClust[CASclose][channel][cell]

                        clusterfound = self.ChemClust[CASclose][channel][cell][
                            str(typeFP) + "-" + str(metricAgg)]
                        enrichment = self.dcluster[channel][cell][typeDesc][
                            clusterfound]['Enrichment']
                    dpred[kpred][typeDesc] = enrichment

        if plot == 1:
            self.writeResultBySMI(dpred, prresult)

        return dpred
Beispiel #19
0
    def __init__(self, pDYE, lcAssays, prout):

        self.prout = prout
        self.pDYE = pDYE
        self.lassays = lcAssays
        self.dDye = toolbox.loadMatrix(self.pDYE, sep = ",")
Beispiel #20
0
    def prepareActiveMatrix(self,
                            corval,
                            maxQuantile,
                            NBNA,
                            pAC50All,
                            prout,
                            luciferase=0):

        self.corval = corval
        self.maxQuantile = maxQuantile

        pdescAct = prout + "descActive"
        pAC50Act = prout + "AC50Active"

        if path.exists(
                pdescAct) and path.getsize(pdescAct) > 10 and path.exists(
                    pAC50Act) and path.getsize(pAC50Act) > 10:
            lpdescActClean = runExternalSoft.dataManager(
                pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout)
            self.pdescCleanActive = lpdescActClean[0]
            self.pAC50AllActive = lpdescActClean[1]
            return [self.pdescCleanActive, self.pAC50AllActive]

        ddesc = toolbox.loadMatrix(self.pdesc1D2D)
        dAC50All = toolbox.loadMatrix(pAC50All)

        if luciferase == 0:
            i = 0
            imax = len(ddesc.keys())

            while i < imax:
                casID = dAC50All.keys()[i]
                nbNA = 0
                for kAC50 in dAC50All[casID].keys():
                    if kAC50 == "CASID" or kAC50 == "Luc_IC50":  # not considered luciferase
                        continue
                    else:
                        if dAC50All[casID][kAC50] == "NA":
                            nbNA += 1
                #print nbNA, len(dAC50All[casID].keys())
                if nbNA == (len(dAC50All[casID].keys()) - 2):
                    del dAC50All[casID]
                    try:
                        del ddesc[casID]
                    except:
                        pass
                    imax = imax - 1
                else:
                    i += 1

            toolbox.writeMatrix(ddesc, pdescAct)
            toolbox.writeMatrix(dAC50All, pAC50Act)

            lpdescActClean = runExternalSoft.dataManager(
                pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout)

            self.pdescCleanActive = lpdescActClean[0]
            self.pAC50AllActive = lpdescActClean[1]

            return [self.pdescCleanActive, self.pAC50AllActive]

        else:
            i = 0
            imax = len(dAC50All.keys())

            while i < imax:
                casID = dAC50All.keys()[i]
                if not casID in ddesc.keys():
                    del dAC50All[casID]
                    imax = imax - 1
                    i = i - 1
                    continue
                for kAC50 in dAC50All[casID].keys():
                    if kAC50 != "Luc_IC50" and kAC50 != "CASID":  # not considered luciferase
                        del dAC50All[casID][kAC50]
                    else:
                        if dAC50All[casID][kAC50] == "NA":
                            del dAC50All[casID]
                            try:
                                del ddesc[casID]
                            except:
                                pass
                            imax = imax - 1
                            i = i - 1
                            break
                i += 1

            toolbox.writeMatrix(ddesc, pdescAct)
            toolbox.writeMatrix(dAC50All, pAC50Act)

            lpdescActClean = runExternalSoft.dataManager(
                pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout)

            self.pdescCleanActive = lpdescActClean[0]
            self.pAC50AllActive = lpdescActClean[1]

            return [self.pdescCleanActive, self.pAC50AllActive]
Beispiel #21
0
    def extractActivebySOM(self, prin=""):

        if prin == "":
            prin = self.prSOMactive

        lfolders = listdir(prin)

        dAC50all = toolbox.loadMatrix(self.pAC50AllActive, sep=",")
        for assay in lfolders:
            pclust = prin + assay + "/SOMClust"

            if not path.exists(pclust):
                continue
            fclust = open(pclust, "r")
            lchemicals = fclust.readlines()
            fclust.close()

            for lineChem in lchemicals[1:]:
                lchemClust = lineChem.strip().replace("\"", "").split(",")
                CAS = lchemClust[0]
                clust = lchemClust[-1]
                if CAS == "NA":
                    continue
                #print CAS, clust

                if assay in dAC50all[CAS].keys():
                    if dAC50all[CAS][assay] != "NA":
                        pclust = pathFolder.createFolder(prin + assay + "/" +
                                                         str(clust) + "/")
                        copyfile(self.prPNG + CAS + ".png",
                                 pclust + CAS + ".png")
                    continue
                elif assay == "red" or assay == "green" or assay == "blue" or assay == "allcolors":
                    lassays = [
                        "hepg2_cell_X_n", "hepg2_med_X_n", "hek293_med_X_n",
                        "hek293_cell_X_n"
                    ]
                    if assay == "allcolors":
                        lassayout = []
                        lassayout = lassayout + [
                            i.replace("X", "blue") for i in lassays
                        ] + [i.replace("X", "green") for i in lassays
                             ] + [i.replace("X", "red") for i in lassays]
                        lassays = lassayout
                    else:
                        lassays = [i.replace("X", assay) for i in lassays]
                    for ass in lassays:
                        if dAC50all[CAS][ass] != "NA":
                            pclust = pathFolder.createFolder(prin + assay +
                                                             "/" + str(clust) +
                                                             "/")
                            copyfile(self.prPNG + CAS + ".png",
                                     pclust + CAS + ".png")
                            break
                elif search("hepg2", assay) or search("hek293", assay):
                    lassays = ["X_cell_Y_n", "X_med_Y_n"]
                    lass = assay.split("_")
                    lassays = [
                        i.replace("X", lass[0]).replace("Y", lass[1])
                        for i in lassays
                    ]

                    for ass in lassays:
                        if dAC50all[CAS][ass] != "NA":
                            pclust = pathFolder.createFolder(prin + assay +
                                                             "/" + str(clust) +
                                                             "/")
                            copyfile(self.prPNG + CAS + ".png",
                                     pclust + CAS + ".png")
                            break
Beispiel #22
0
def rank_chem(p_dataset, p_cluster, pr_PNG, pr_out):

    # load files
    d_MIC = toolbox.loadMatrix(p_dataset)

    d_cluster = toolbox.loadMatrix(p_cluster, sep=",")

    # rank pMIC
    d_rank = {}

    for chem in d_MIC.keys():
        for orga in d_MIC[chem].keys():
            if orga == "SMILES" or orga == 'CMPD_CHEMBLID':
                continue
            if not orga in d_rank.keys():
                d_rank[orga] = []
            pMIC = -log10(float(d_MIC[chem][orga]))
            d_MIC[chem][orga] = pMIC
            d_rank[orga].append(pMIC)

    # order pMIC
    for orga in d_rank.keys():
        d_rank[orga].sort(reverse=True)

    # reorganise d_cluster
    d_cluster_used = {}
    for chem in d_cluster.keys():
        cluster = int(d_cluster[chem]["cluster"])
        if not cluster in d_cluster_used.keys():
            d_cluster_used[cluster] = []
        d_cluster_used[cluster].append(chem)

    # build the png
    lcluster = d_cluster_used.keys()
    lcluster.sort()

    i_page = 1

    l_pngout = []
    for cluster in lcluster:
        nchem = len(d_cluster_used[cluster])
        l_pchempng = []
        lw = []
        i_chem = 0
        nchem_page = 0
        while i_chem <= nchem:
            if nchem_page == 6 or i_chem == nchem:
                p_image = pr_out + "page_" + str(i_page) + ".png"
                l_pngout.append(p_image)
                imgnew = Image.new("RGBA", (1535, 1285), (250, 250, 250))

                i_image = 0
                i_img_max = len(l_pchempng)
                while i_image < i_img_max:

                    # put png
                    img1 = Image.open(l_pchempng[i_image])
                    if i_image < 3:
                        imgnew.paste(img1, (0 + i_image * 510, 0))

                        draw = ImageDraw.Draw(imgnew)
                        draw.text((5 + 510 * i_image, 500),
                                  lw[0 + (i_image * 5)], (0, 0, 0),
                                  font=font)
                        draw.text((5 + 510 * i_image, 525),
                                  lw[1 + (i_image * 5)], (0, 0, 0),
                                  font=font)
                        draw.text((5 + 510 * i_image, 550),
                                  lw[2 + (i_image * 5)], (0, 0, 0),
                                  font=font)
                        draw.text((5 + 510 * i_image, 575),
                                  lw[3 + (i_image * 5)], (0, 0, 0),
                                  font=font)
                        draw.text((5 + 510 * i_image, 600),
                                  lw[4 + (i_image * 5)], (0, 0, 0),
                                  font=font)
                    else:
                        imgnew.paste(img1, (0 + (i_image - 3) * 510, 650))

                        draw = ImageDraw.Draw(imgnew)
                        draw.text((5 + 510 * (i_image - 3), 1150),
                                  lw[15 + ((i_image - 3) * 5)], (0, 0, 0),
                                  font=font)
                        draw.text((5 + 510 * (i_image - 3), 1175),
                                  lw[16 + ((i_image - 3) * 5)], (0, 0, 0),
                                  font=font)
                        draw.text((5 + 510 * (i_image - 3), 1200),
                                  lw[17 + ((i_image - 3) * 5)], (0, 0, 0),
                                  font=font)
                        draw.text((5 + 510 * (i_image - 3), 1225),
                                  lw[18 + ((i_image - 3) * 5)], (0, 0, 0),
                                  font=font)
                        draw.text((5 + 510 * (i_image - 3), 1250),
                                  lw[19 + ((i_image - 3) * 5)], (0, 0, 0),
                                  font=font)
                    i_image = i_image + 1

                imgnew.save(p_image)
                # add
                nchem_page = 0
                lw = []
                l_pchempng = []
                i_page = i_page + 1
                if i_chem == nchem:
                    break
                else:
                    continue

            l_pchempng.append(pr_PNG + d_cluster_used[cluster][i_chem] +
                              ".png")
            lw.append("%s (cluster: %s)" %
                      (d_cluster_used[cluster][i_chem], cluster))
            lw.append(
                "pMIC (E. coli): %.2f (%s)" %
                (d_MIC[d_cluster_used[cluster][i_chem]]["Escherichia coli"],
                 d_rank["Escherichia coli"].index(d_MIC[
                     d_cluster_used[cluster][i_chem]]["Escherichia coli"]) +
                 1))
            lw.append(
                "pMIC (P. aeruginosa): %.2f (%s)" %
                (d_MIC[d_cluster_used[cluster]
                       [i_chem]]["Pseudomonas aeruginosa"],
                 d_rank["Pseudomonas aeruginosa"].index(d_MIC[d_cluster_used[
                     cluster][i_chem]]["Pseudomonas aeruginosa"]) + 1))
            lw.append(
                "pMIC (S. aureus): %.2f (%s)" %
                (d_MIC[d_cluster_used[cluster]
                       [i_chem]]["Staphylococcus aureus"],
                 d_rank["Staphylococcus aureus"].index(d_MIC[d_cluster_used[
                     cluster][i_chem]]["Staphylococcus aureus"]) + 1))
            lw.append(
                "pMIC (S. pneumoniae): %.2f (%s)" %
                (d_MIC[d_cluster_used[cluster]
                       [i_chem]]["Streptococcus pneumoniae"],
                 d_rank["Streptococcus pneumoniae"].index(d_MIC[d_cluster_used[
                     cluster][i_chem]]["Streptococcus pneumoniae"]) + 1))
            nchem_page = nchem_page + 1
            i_chem = i_chem + 1

    # transform png to pdf
    lpdf = []
    for ppng in l_pngout:
        ppdf = runExternalSoft.pngtopdf(ppng)
        lpdf.append(ppdf)

    # merge pdf sheet
    runExternalSoft.mergepdfs(lpdf, pr_out + "chem_pMIC.pdf")
def applyModel(pdesc, prmodels, prout):

    lfolderModel = listdir(prmodels)
    dmodel = {}
    for folderModel in lfolderModel:
        dmodel[folderModel] = {}
        lmodel = listdir(prmodels + folderModel)
        for model in lmodel:
            dmodel[folderModel][model] = {}
            prmodel = prmodels + folderModel + "/" + model
            for ML in listdir(prmodel):
                prmodelML = prmodels + folderModel + "/" + model + "/" + ML + "/"
                lmodelR = listdir(prmodelML)
                dmodel[folderModel][model][ML] = [
                    prmodels + folderModel + "/" + model + "/" + ML + "/" +
                    modelR for modelR in lmodelR
                ]

    for typeModel in dmodel.keys():
        pathFolder.createFolder(prout + typeModel + "/")
        for color in dmodel[typeModel].keys():
            pathFolder.createFolder(prout + typeModel + "/" + color + "/")
            dperf = {}
            for ML in dmodel[typeModel][color].keys():
                proutbyRmodel = pathFolder.createFolder(prout + typeModel +
                                                        "/" + color + "/" +
                                                        ML + "/")
                lpredict = []
                for modelR in dmodel[typeModel][color][ML]:
                    ppredict = runExternalSoft.predictModel(
                        pdesc, modelR, ML, proutbyRmodel)
                    lpredict.append(ppredict)

                dprob = {}
                for ppredict in lpredict:
                    try:
                        dpredict = toolbox.loadMatrix(ppredict, sep=",")
                    except:
                        continue
                    for chem in dpredict.keys():
                        if not chem in dprob:
                            dprob[chem] = {}
                            dprob[chem]["Pred"] = []
                            dprob[chem]["Aff"] = dpredict[chem]["Aff"]
                        if dpredict[chem]["Pred"] != "NA":
                            dprob[chem]["Pred"].append(
                                float(dpredict[chem]["Pred"]))

                pfsumML = proutbyRmodel + "sumProb"
                fsumML = open(pfsumML, "w")
                fsumML.write("ID,Mpred,SDpred,Real\n")
                for chem in dprob.keys():
                    if len(dprob[chem]["Pred"]) == 0:
                        fsumML.write("%s,NA,NA,%s\n" %
                                     (chem, dprob[chem]["Aff"]))
                    else:
                        fsumML.write(
                            "%s,%f,%f,%s\n" %
                            (chem, mean(dprob[chem]["Pred"]),
                             std(dprob[chem]["Pred"]), dprob[chem]["Aff"]))
                fsumML.close()
                pquality = runExternalSoft.qualityPred(pfsumML)
                runExternalSoft.plotAC50VSProb(pfsumML, proutbyRmodel)
                dperf[ML] = toolbox.loadMatrix(pquality, sep=",")

            pfsum = prout + typeModel + "/" + color + "/sumPerf.csv"
            fsum = open(pfsum, "w")
            lh = [
                "TP", "TN", "FP", "FN", "acc", "se", "sp", "mcc", "MpbTP",
                "SDpbTP", "MpbTN", "SDpbTN", "MpbFP", "SDpbFP", "MpbFN",
                "SDpbFN"
            ]
            fsum.write("Model," + ",".join(lh) + "\n")
            for ML in dperf.keys():
                lw = [dperf[ML][h]["x"] for h in lh]
                i = 0
                imax = len(lw)
                while i < imax:
                    try:
                        lw[i] = str(round(float(lw[i]), 2))
                    except:
                        lw[i] = str(lw[i])
                    i += 1
                fsum.write("%s,%s\n" % (ML, ",".join(lw)))
            fsum.close()