예제 #1
0
    def enrichmentIndex(self, pAC50All, FP=1):

        self.pAC50All = pAC50All

        if FP == 1:
            prenrich = self.prout + self.cdesc.pFP.split("/")[-1] + "_" + str(self.aggType) + "_enrich-index" + "/"
        else:
            prenrich = self.prout + str(self.clusterMeth) + "_" + str(self.distmeth) + "_" + str(self.aggType) + "_enrich-index/"
        pathFolder.createFolder(prenrich)

        self.prenrich = prenrich

        lfileenrich = listdir(prenrich)

        if FP == 0:
            if not "pdesclean" in self.__dict__:
                self.pdesclean = prenrich + "descClean.csv"
                if not path.exists(self.pdesclean):
                    runExternalSoft.dataManager(self.cdesc.pdesc1D2D, 0, self.corval, self.maxquantile, prenrich)
            if len(lfileenrich) > 2: return 0
            runExternalSoft.enrichmentIndex(self.pdesclean, prenrich, pAC50All, self.clusterMeth, self.distmeth, self.aggType)
        else:

            if len(lfileenrich) > 2: return 0
            runExternalSoft.enrichmentIndex(self.cdesc.pFP, prenrich, pAC50All, self.clusterMeth, self.distmeth, self.aggType)
        return 0
예제 #2
0
    def setConstantPreproc(self, pAC50, corval, maxQuantile, nbNA, prAnalysis):

        self.corval = corval
        self.maxQauntile = maxQuantile
        self.pAC50 = pAC50
        self.prAnalysis = prAnalysis

        # output
        paffclean = self.prAnalysis + "AC50Clean.csv"
        pdesc1D2Dclean = self.prAnalysis + "descClean.csv"

        if path.exists(paffclean):
            self.pAC50clean = paffclean
        if path.exists(pdesc1D2Dclean):
            self.pdesc1D2Dclean = pdesc1D2Dclean
            return 0

        elif path.exists(self.pdesc1D2D) and path.getsize(self.pdesc1D2D) > 10:
            # preproc
            runExternalSoft.dataManager(self.pdesc1D2D, self.pAC50,
                                        self.corval, self.maxQauntile, nbNA,
                                        self.prAnalysis)

            if path.exists(paffclean):
                self.pAC50clean = paffclean
            if path.exists(pdesc1D2Dclean):
                self.pdesc1D2Dclean = pdesc1D2Dclean
        return 0
예제 #3
0
    def createMDS(self, pdesc1D2D, pAC50, corval, maxQuantile, prMDS):

        # output
        pdesc1D2Dclean = prMDS + "descClean.csv"

        if not path.exists(pdesc1D2Dclean):

            if path.exists(pdesc1D2D) and path.getsize(pdesc1D2D) > 10:
                # preproc
                runExternalSoft.dataManager(pdesc1D2D, 0, corval, maxQuantile,
                                            prMDS)
            else:
                print "Error ->", pdesc1D2D

        runExternalSoft.drawMDS(pdesc1D2Dclean, pAC50, prMDS)
예제 #4
0
def createSOM(pdesc1D2D, pAC50, corval, maxQuantile, pModel, nbNA, prSOM):

    # output
    pdesc1D2Dclean = prSOM + "descClean.csv"

    if not path.exists(pdesc1D2Dclean):

        if path.exists(pdesc1D2D) and path.getsize(pdesc1D2D) > 10:
            # preproc
            runExternalSoft.dataManager(pdesc1D2D, 0, corval, maxQuantile, nbNA, prSOM)
        else:
            print "Error ->", pdesc1D2D


    runExternalSoft.drawEnrichSOM(pdesc1D2Dclean, pAC50, pModel, prSOM)
예제 #5
0
def PCACross(pdesc, pAC50_hepg2, pAC50_hek293, nbNA, corval, maxQuantile,
             prCrossPCA):

    # output
    pdesc1D2Dclean = prCrossPCA + "descClean.csv"

    if not path.exists(pdesc1D2Dclean):

        if path.exists(pdesc) and path.getsize(pdesc) > 10:
            # preproc
            runExternalSoft.dataManager(pdesc, 0, corval, maxQuantile, nbNA,
                                        prCrossPCA)
        else:
            print "Error ->", pdesc

    runExternalSoft.drawPCACross(pdesc1D2Dclean, pAC50_hepg2, pAC50_hek293,
                                 prCrossPCA)
예제 #6
0
    def loadClassForPred(self, corval, maxQuantile, lCASID=[]):

        #Desc 1D and 2D
        self.computeDesc()
        lpdescClean = runExternalSoft.dataManager(self.pdesc1D2D, "0", corval,
                                                  maxQuantile, self.prout)

        self.pdesc1D2Dclean = lpdescClean[0]

        if lCASID != []:
            self.reduceMatrixDesc(self.pdesc1D2Dclean, lCASID)

        self.computeFP("All")

        if lCASID != []:
            self.reduceMatrixFP(self.dFP, lCASID)
예제 #7
0
    def createMainClustering(self, doublecluster = 0, lcas = []):

        if self.distmeth == None: # case of fp
            self.prCluster = self.prout + self.cdesc.pFP.split("/")[-1] + "-" + str(self.clusterMeth) + "_" + str(self.distmeth) \
                             + "_" + str(self.aggType.replace(".", "")) + "_" + str(self.optimalNBclustMeth) + "/"
        else:
            self.prCluster = self.prout + str(self.clusterMeth) + "_" + str(self.distmeth) + "_" + str(
                self.aggType.replace(".", "")) + "_" + str(self.optimalNBclustMeth) + "/"
        pathFolder.createFolder(self.prCluster)


        # data preparation
        self.pdesclean = self.prCluster + "descClean.csv"

        if not path.exists(self.pdesclean):
            if path.exists(self.cdesc.pdesc1D2D) and path.getsize(self.cdesc.pdesc1D2D) > 10:
                # preproc
                if self.distmeth == None:
                    if lcas != []:
                        self.cdesc.reduceMatrixFP(lcas, self.pdesclean)
                    else:
                        copyfile(self.cdesc.pFP, self.pdesclean)
                else:
                    runExternalSoft.dataManager(self.cdesc.pdesc1D2D, 0, self.corval, self.maxquantile, self.prCluster)
            else:
                print "Error ->", self.cdesc.pdesc1D2D

        pcluster = self.prCluster + "cluster.csv"
        if not path.exists(pcluster):
            if self.distmeth == None:
                pcluster = runExternalSoft.clusteringSimilarityMatrix(self.pdesclean, self.prCluster, self.aggType, self.clusterMeth, self.optimalNBclustMeth)
            else:
                #clustering -> first level
                pcluster = runExternalSoft.clustering(self.pdesclean, "0", self.prCluster, self.distmeth, self.aggType, self.clusterMeth, self.optimalNBclustMeth)

        if doublecluster == 1:
            #Clustering second level
            if pcluster != 0:
                self.createSecondaryClustering(pcluster)

            # create main cluster file
            pclustersFinal = self.prCluster + "clusterMain.csv"
            if not path.exists(pclustersFinal):
                fclustersFinal = open(pclustersFinal, "w")
                fclustersFinal.write("ID,Cluster1,Cluster2\n")

                fcluster1 = open(pcluster, "r")
                lchemCluster1 = fcluster1.readlines()
                fcluster1.close()

                dclust = {}
                for chemCluster1 in lchemCluster1[1:]:
                    chemCluster1 = chemCluster1.strip().replace("\"", "").split(",")
                    chemID = chemCluster1[0]
                    clust = chemCluster1[1]
                    dclust[chemID] = [clust]

                for fileCluster in listdir(self.prCluster):
                    if search("Clust", fileCluster):
                        pclust2 = self.prCluster + fileCluster + "/cluster.csv"
                        if path.exists(pclust2):
                            fclust2 = open(pclust2, "r")
                            lchemCluster2 = fclust2.readlines()
                            fclust2.close()

                            for chemCluster2 in lchemCluster2[1:]:
                                chemCluster2 = chemCluster2.strip().replace("\"", "").split(",")
                                chemID = chemCluster2[0]
                                clust2 = chemCluster2[1]

                                dclust[chemID].append(clust2)

                #write main cluster
                for chemID in dclust.keys():
                    if len(dclust[chemID]) == 1:
                        dclust[chemID].append("1")
                    fclustersFinal.write(str(chemID) + "," + ",".join(dclust[chemID]) + "\n")
                fclustersFinal.close()
            self.pclusters = pclustersFinal

        else:
            self.pclusters = pcluster
예제 #8
0
    def prepareActiveMatrix(self,
                            corval,
                            maxQuantile,
                            NBNA,
                            pAC50All,
                            prout,
                            luciferase=0):

        self.corval = corval
        self.maxQuantile = maxQuantile

        pdescAct = prout + "descActive"
        pAC50Act = prout + "AC50Active"

        if path.exists(
                pdescAct) and path.getsize(pdescAct) > 10 and path.exists(
                    pAC50Act) and path.getsize(pAC50Act) > 10:
            lpdescActClean = runExternalSoft.dataManager(
                pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout)
            self.pdescCleanActive = lpdescActClean[0]
            self.pAC50AllActive = lpdescActClean[1]
            return [self.pdescCleanActive, self.pAC50AllActive]

        ddesc = toolbox.loadMatrix(self.pdesc1D2D)
        dAC50All = toolbox.loadMatrix(pAC50All)

        if luciferase == 0:
            i = 0
            imax = len(ddesc.keys())

            while i < imax:
                casID = dAC50All.keys()[i]
                nbNA = 0
                for kAC50 in dAC50All[casID].keys():
                    if kAC50 == "CASID" or kAC50 == "Luc_IC50":  # not considered luciferase
                        continue
                    else:
                        if dAC50All[casID][kAC50] == "NA":
                            nbNA += 1
                #print nbNA, len(dAC50All[casID].keys())
                if nbNA == (len(dAC50All[casID].keys()) - 2):
                    del dAC50All[casID]
                    try:
                        del ddesc[casID]
                    except:
                        pass
                    imax = imax - 1
                else:
                    i += 1

            toolbox.writeMatrix(ddesc, pdescAct)
            toolbox.writeMatrix(dAC50All, pAC50Act)

            lpdescActClean = runExternalSoft.dataManager(
                pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout)

            self.pdescCleanActive = lpdescActClean[0]
            self.pAC50AllActive = lpdescActClean[1]

            return [self.pdescCleanActive, self.pAC50AllActive]

        else:
            i = 0
            imax = len(dAC50All.keys())

            while i < imax:
                casID = dAC50All.keys()[i]
                if not casID in ddesc.keys():
                    del dAC50All[casID]
                    imax = imax - 1
                    i = i - 1
                    continue
                for kAC50 in dAC50All[casID].keys():
                    if kAC50 != "Luc_IC50" and kAC50 != "CASID":  # not considered luciferase
                        del dAC50All[casID][kAC50]
                    else:
                        if dAC50All[casID][kAC50] == "NA":
                            del dAC50All[casID]
                            try:
                                del ddesc[casID]
                            except:
                                pass
                            imax = imax - 1
                            i = i - 1
                            break
                i += 1

            toolbox.writeMatrix(ddesc, pdescAct)
            toolbox.writeMatrix(dAC50All, pAC50Act)

            lpdescActClean = runExternalSoft.dataManager(
                pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout)

            self.pdescCleanActive = lpdescActClean[0]
            self.pAC50AllActive = lpdescActClean[1]

            return [self.pdescCleanActive, self.pAC50AllActive]