def enrichmentIndex(self, pAC50All, FP=1): self.pAC50All = pAC50All if FP == 1: prenrich = self.prout + self.cdesc.pFP.split("/")[-1] + "_" + str(self.aggType) + "_enrich-index" + "/" else: prenrich = self.prout + str(self.clusterMeth) + "_" + str(self.distmeth) + "_" + str(self.aggType) + "_enrich-index/" pathFolder.createFolder(prenrich) self.prenrich = prenrich lfileenrich = listdir(prenrich) if FP == 0: if not "pdesclean" in self.__dict__: self.pdesclean = prenrich + "descClean.csv" if not path.exists(self.pdesclean): runExternalSoft.dataManager(self.cdesc.pdesc1D2D, 0, self.corval, self.maxquantile, prenrich) if len(lfileenrich) > 2: return 0 runExternalSoft.enrichmentIndex(self.pdesclean, prenrich, pAC50All, self.clusterMeth, self.distmeth, self.aggType) else: if len(lfileenrich) > 2: return 0 runExternalSoft.enrichmentIndex(self.cdesc.pFP, prenrich, pAC50All, self.clusterMeth, self.distmeth, self.aggType) return 0
def setConstantPreproc(self, pAC50, corval, maxQuantile, nbNA, prAnalysis): self.corval = corval self.maxQauntile = maxQuantile self.pAC50 = pAC50 self.prAnalysis = prAnalysis # output paffclean = self.prAnalysis + "AC50Clean.csv" pdesc1D2Dclean = self.prAnalysis + "descClean.csv" if path.exists(paffclean): self.pAC50clean = paffclean if path.exists(pdesc1D2Dclean): self.pdesc1D2Dclean = pdesc1D2Dclean return 0 elif path.exists(self.pdesc1D2D) and path.getsize(self.pdesc1D2D) > 10: # preproc runExternalSoft.dataManager(self.pdesc1D2D, self.pAC50, self.corval, self.maxQauntile, nbNA, self.prAnalysis) if path.exists(paffclean): self.pAC50clean = paffclean if path.exists(pdesc1D2Dclean): self.pdesc1D2Dclean = pdesc1D2Dclean return 0
def createMDS(self, pdesc1D2D, pAC50, corval, maxQuantile, prMDS): # output pdesc1D2Dclean = prMDS + "descClean.csv" if not path.exists(pdesc1D2Dclean): if path.exists(pdesc1D2D) and path.getsize(pdesc1D2D) > 10: # preproc runExternalSoft.dataManager(pdesc1D2D, 0, corval, maxQuantile, prMDS) else: print "Error ->", pdesc1D2D runExternalSoft.drawMDS(pdesc1D2Dclean, pAC50, prMDS)
def createSOM(pdesc1D2D, pAC50, corval, maxQuantile, pModel, nbNA, prSOM): # output pdesc1D2Dclean = prSOM + "descClean.csv" if not path.exists(pdesc1D2Dclean): if path.exists(pdesc1D2D) and path.getsize(pdesc1D2D) > 10: # preproc runExternalSoft.dataManager(pdesc1D2D, 0, corval, maxQuantile, nbNA, prSOM) else: print "Error ->", pdesc1D2D runExternalSoft.drawEnrichSOM(pdesc1D2Dclean, pAC50, pModel, prSOM)
def PCACross(pdesc, pAC50_hepg2, pAC50_hek293, nbNA, corval, maxQuantile, prCrossPCA): # output pdesc1D2Dclean = prCrossPCA + "descClean.csv" if not path.exists(pdesc1D2Dclean): if path.exists(pdesc) and path.getsize(pdesc) > 10: # preproc runExternalSoft.dataManager(pdesc, 0, corval, maxQuantile, nbNA, prCrossPCA) else: print "Error ->", pdesc runExternalSoft.drawPCACross(pdesc1D2Dclean, pAC50_hepg2, pAC50_hek293, prCrossPCA)
def loadClassForPred(self, corval, maxQuantile, lCASID=[]): #Desc 1D and 2D self.computeDesc() lpdescClean = runExternalSoft.dataManager(self.pdesc1D2D, "0", corval, maxQuantile, self.prout) self.pdesc1D2Dclean = lpdescClean[0] if lCASID != []: self.reduceMatrixDesc(self.pdesc1D2Dclean, lCASID) self.computeFP("All") if lCASID != []: self.reduceMatrixFP(self.dFP, lCASID)
def createMainClustering(self, doublecluster = 0, lcas = []): if self.distmeth == None: # case of fp self.prCluster = self.prout + self.cdesc.pFP.split("/")[-1] + "-" + str(self.clusterMeth) + "_" + str(self.distmeth) \ + "_" + str(self.aggType.replace(".", "")) + "_" + str(self.optimalNBclustMeth) + "/" else: self.prCluster = self.prout + str(self.clusterMeth) + "_" + str(self.distmeth) + "_" + str( self.aggType.replace(".", "")) + "_" + str(self.optimalNBclustMeth) + "/" pathFolder.createFolder(self.prCluster) # data preparation self.pdesclean = self.prCluster + "descClean.csv" if not path.exists(self.pdesclean): if path.exists(self.cdesc.pdesc1D2D) and path.getsize(self.cdesc.pdesc1D2D) > 10: # preproc if self.distmeth == None: if lcas != []: self.cdesc.reduceMatrixFP(lcas, self.pdesclean) else: copyfile(self.cdesc.pFP, self.pdesclean) else: runExternalSoft.dataManager(self.cdesc.pdesc1D2D, 0, self.corval, self.maxquantile, self.prCluster) else: print "Error ->", self.cdesc.pdesc1D2D pcluster = self.prCluster + "cluster.csv" if not path.exists(pcluster): if self.distmeth == None: pcluster = runExternalSoft.clusteringSimilarityMatrix(self.pdesclean, self.prCluster, self.aggType, self.clusterMeth, self.optimalNBclustMeth) else: #clustering -> first level pcluster = runExternalSoft.clustering(self.pdesclean, "0", self.prCluster, self.distmeth, self.aggType, self.clusterMeth, self.optimalNBclustMeth) if doublecluster == 1: #Clustering second level if pcluster != 0: self.createSecondaryClustering(pcluster) # create main cluster file pclustersFinal = self.prCluster + "clusterMain.csv" if not path.exists(pclustersFinal): fclustersFinal = open(pclustersFinal, "w") fclustersFinal.write("ID,Cluster1,Cluster2\n") fcluster1 = open(pcluster, "r") lchemCluster1 = fcluster1.readlines() fcluster1.close() dclust = {} for chemCluster1 in lchemCluster1[1:]: chemCluster1 = chemCluster1.strip().replace("\"", "").split(",") chemID = chemCluster1[0] clust = chemCluster1[1] dclust[chemID] = [clust] for fileCluster in listdir(self.prCluster): if search("Clust", fileCluster): pclust2 = self.prCluster + fileCluster + "/cluster.csv" if path.exists(pclust2): fclust2 = open(pclust2, "r") lchemCluster2 = fclust2.readlines() fclust2.close() for chemCluster2 in lchemCluster2[1:]: chemCluster2 = chemCluster2.strip().replace("\"", "").split(",") chemID = chemCluster2[0] clust2 = chemCluster2[1] dclust[chemID].append(clust2) #write main cluster for chemID in dclust.keys(): if len(dclust[chemID]) == 1: dclust[chemID].append("1") fclustersFinal.write(str(chemID) + "," + ",".join(dclust[chemID]) + "\n") fclustersFinal.close() self.pclusters = pclustersFinal else: self.pclusters = pcluster
def prepareActiveMatrix(self, corval, maxQuantile, NBNA, pAC50All, prout, luciferase=0): self.corval = corval self.maxQuantile = maxQuantile pdescAct = prout + "descActive" pAC50Act = prout + "AC50Active" if path.exists( pdescAct) and path.getsize(pdescAct) > 10 and path.exists( pAC50Act) and path.getsize(pAC50Act) > 10: lpdescActClean = runExternalSoft.dataManager( pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout) self.pdescCleanActive = lpdescActClean[0] self.pAC50AllActive = lpdescActClean[1] return [self.pdescCleanActive, self.pAC50AllActive] ddesc = toolbox.loadMatrix(self.pdesc1D2D) dAC50All = toolbox.loadMatrix(pAC50All) if luciferase == 0: i = 0 imax = len(ddesc.keys()) while i < imax: casID = dAC50All.keys()[i] nbNA = 0 for kAC50 in dAC50All[casID].keys(): if kAC50 == "CASID" or kAC50 == "Luc_IC50": # not considered luciferase continue else: if dAC50All[casID][kAC50] == "NA": nbNA += 1 #print nbNA, len(dAC50All[casID].keys()) if nbNA == (len(dAC50All[casID].keys()) - 2): del dAC50All[casID] try: del ddesc[casID] except: pass imax = imax - 1 else: i += 1 toolbox.writeMatrix(ddesc, pdescAct) toolbox.writeMatrix(dAC50All, pAC50Act) lpdescActClean = runExternalSoft.dataManager( pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout) self.pdescCleanActive = lpdescActClean[0] self.pAC50AllActive = lpdescActClean[1] return [self.pdescCleanActive, self.pAC50AllActive] else: i = 0 imax = len(dAC50All.keys()) while i < imax: casID = dAC50All.keys()[i] if not casID in ddesc.keys(): del dAC50All[casID] imax = imax - 1 i = i - 1 continue for kAC50 in dAC50All[casID].keys(): if kAC50 != "Luc_IC50" and kAC50 != "CASID": # not considered luciferase del dAC50All[casID][kAC50] else: if dAC50All[casID][kAC50] == "NA": del dAC50All[casID] try: del ddesc[casID] except: pass imax = imax - 1 i = i - 1 break i += 1 toolbox.writeMatrix(ddesc, pdescAct) toolbox.writeMatrix(dAC50All, pAC50Act) lpdescActClean = runExternalSoft.dataManager( pdescAct, pAC50Act, corval, maxQuantile, NBNA, prout) self.pdescCleanActive = lpdescActClean[0] self.pAC50AllActive = lpdescActClean[1] return [self.pdescCleanActive, self.pAC50AllActive]