def computeCoords(self, map_name, table_descriptor, corVal, distributionVal): # selection list inchikey and dsstox from chemicals table self.map_name = map_name pr_out = pathFolder.createFolder(self.pr_out + "update" + map_name.upper() + "/") pr_coords = pathFolder.createFolder(pr_out + "coords/") prproj = pathFolder.createFolder(pr_coords + "proj_" + str(corVal) + "-" + str(distributionVal) + "/") # extract descriptor from DB p1D2D = pr_coords + "1D2D.csv" p3D = pr_coords + "3D.csv" if not path.exists(p1D2D): self.extractDesc("chemical_description", "chem_descriptor_1d2d_name", "desc_1d2d", p1D2D) if not path.exists(p3D): self.extractDesc("chemical_description", "chem_descriptor_3d_name", "desc_3d", p3D) # create coords if not path.exists(prproj + "coord1D2D.csv") or not path.exists(prproj + "coord3D.csv"): runExternalSoft.RComputeMapFiles(p1D2D, p3D, prproj, corVal, distributionVal) self.pr_coords = prproj
def ChemByCurve(cassay, ppng, prout): if not "dresponse" in dir(cassay): cassay.responseCurves(drawn=0) for CASID in cassay.dresponse.keys(): for condition in cassay.dresponse[CASID].keys(): curveclass = cassay.dresponse[CASID][condition]['CURVE_CLASS2'] AC50 = cassay.dresponse[CASID][condition]['AC50'] prcurve = prout + condition + "/" + curveclass + "/" if not path.exists(prcurve): pathFolder.createFolder(prcurve) writeLine = [ "CAS: " + str(CASID), "AC50: " + str(AC50), "Curve: " + str(curveclass) ] pcaspng = ppng + CASID + ".png" if not path.exists(pcaspng): continue pimageout = prcurve + CASID + ".png" try: img = Image.open(pcaspng) except: continue imgnew = Image.new("RGBA", (580, 775), (250, 250, 250)) imgnew.paste(img, (0, 0)) draw = ImageDraw.Draw(imgnew) draw.text((10, 600), str(writeLine[0]), (0, 0, 0), font=font) draw.text((10, 625), str(writeLine[1]), (0, 0, 0), font=font) draw.text((10, 650), str(writeLine[2]), (0, 0, 0), font=font) imgnew.save(pimageout)
def __init__(self, sdata, pfilecluster, proutcluster, lprdockingpose): self.pcluster = pfilecluster self.prout = proutcluster self.lprdockingpose = lprdockingpose self.sdata = sdata # affinity if sdata != {}: daff = {} for compound in self.sdata: daff[compound["CMPD_CHEMBLID"]] = compound["PCHEMBL_VALUE"] self.Aff = daff prout = self.prout + "-".join(pfilecluster[0:-4].split("_")[1:]) + "/" prout = prout.replace(".", "") pathFolder.createFolder(prout) self.prout = prout filecluster = open(pfilecluster, "r") llinesCluster = filecluster.readlines() filecluster.close() dcluster = {} for lineCluster in llinesCluster[1:]: cluster = lineCluster.strip().split(",")[-1].replace("\"", "") #print cluster if not cluster in dcluster.keys(): dcluster[cluster] = [] compoundID = lineCluster.strip().split("\"")[1] dcluster[cluster].append(compoundID) self.clusters = dcluster
def extractFrame(self): """Extract frame and wrap water""" # for MD launch lprframe = [] i = 1 for jobname in self.lMD.keys(): print "Extract - Frame =>", jobname, i if "pcmsout" in self.lMD[jobname].keys( ) and "prtrj" in self.lMD[jobname].keys(): prframes = self.pranalysis + str(jobname) + "/framesMD/" lprframe.append(prframes) pathFolder.createFolder(prframes) nbframeth = float(self.MDtime) / (int(self.stepFrame)) / float( self.interval) + 1 print nbframeth, len(listdir(prframes)) if len(listdir(prframes)) < int( nbframeth): # control if frame exist # clean folder pathFolder.cleanFolder(prframes) runExternalSoft.extractFrame(self.lMD[jobname]["pcmsout"], self.lMD[jobname]["prtrj"], prframes, noHOH=self.water, step=self.stepFrame, MDtime=self.MDtime) lprframe = toolbox.parallelLaunch( lprframe, self.nbCPU, str(int(float(self.MDtime) / 10))) self.lMD[jobname]["prframe"] = prframes i += 1
def AC50Distribution(self): prAC50 = self.proutSP + "histAC50/" pathFolder.createFolder(prAC50) # run hist plot runExternalSoft.plotAC50(self.pAC50, prAC50, self.name.split("-")[1])
def corAC50(self): pcor = self.proutSP + "corAC50/" pathFolder.createFolder(pcor) dtypecurve = {} #define different type of fluo ltypefluo = [] for chem in self.lchem: typefluo = chem["SAMPLE_DATA_TYPE"] if not typefluo in ltypefluo: ltypefluo.append(typefluo) for chem in self.lchem: casID = chem["CAS"] if not casID in dtypecurve.keys(): dtypecurve[casID] = {} for typefluo in ltypefluo: dtypecurve[casID][typefluo] = "NA" dtypecurve[casID][typefluo] = chem["CURVE_CLASS2"] pcurve = pcor + "curve" fcurve = open(pcurve, "w") fcurve.write("CAS" + "\t" + "\t".join(ltypefluo) + "\n") for casID in dtypecurve.keys(): fcurve.write( casID + "\t" + "\t".join([str(dtypecurve[casID][k]) for k in ltypefluo]) + "\n") fcurve.close() runExternalSoft.corAC50(self.pAC50, pcurve, pcor) return
def runQSARClass(cDesc, cAssay, pAC50All, corval, maxQuantile, splitratio, nbCV, ratioAct, nbrepeat, nbNA, nameCell, lchannels, typeData, prout): for i in range(1, nbrepeat + 1): prQSAR = prout + str(i) + "/" #rmtree(prQSAR)############################################################################### to remove pathFolder.createFolder(prQSAR) cModel = Model(cDesc.pdesc1D2D, cAssay.pAC50, pAC50All, "class", corval, maxQuantile, splitratio, nbCV, ratioAct, nbNA, nameCell, lchannels, prQSAR) if typeData == "color": cModel.prepDataColor() elif typeData == "crosscolor": cModel.prepDataCrossColor() else: cModel.prepData(typeData) cModel.buildQSARClass() prQSARAV = pathFolder.createFolder(prout + "Average/") prQSARProb = pathFolder.createFolder(prout + "Prob/") mergeProba(prout, "RF", prQSARProb) mergeResults(prout, prQSARAV) prDescAV = pathFolder.createFolder(prout + "descImportance/") mergeDescInvolve(prout, "LDA", 10, prDescAV) mergeDescInvolve(prout, "RF", 10, prDescAV)
def enrichmentIndex(self, pAC50All, FP=1): self.pAC50All = pAC50All if FP == 1: prenrich = self.prout + self.cdesc.pFP.split("/")[-1] + "_" + str(self.aggType) + "_enrich-index" + "/" else: prenrich = self.prout + str(self.clusterMeth) + "_" + str(self.distmeth) + "_" + str(self.aggType) + "_enrich-index/" pathFolder.createFolder(prenrich) self.prenrich = prenrich lfileenrich = listdir(prenrich) if FP == 0: if not "pdesclean" in self.__dict__: self.pdesclean = prenrich + "descClean.csv" if not path.exists(self.pdesclean): runExternalSoft.dataManager(self.cdesc.pdesc1D2D, 0, self.corval, self.maxquantile, prenrich) if len(lfileenrich) > 2: return 0 runExternalSoft.enrichmentIndex(self.pdesclean, prenrich, pAC50All, self.clusterMeth, self.distmeth, self.aggType) else: if len(lfileenrich) > 2: return 0 runExternalSoft.enrichmentIndex(self.cdesc.pFP, prenrich, pAC50All, self.clusterMeth, self.distmeth, self.aggType) return 0
def visualizeOptimalClustering(self, prresult, FP = 0): prFinalClustering = pathFolder.createFolder(prresult + "FinalClustering/") print prFinalClustering if not "proptimal" in self.__dict__: print "Error clustering no load" else: lfileopt = listdir(self.proptimal) for fileopt in lfileopt: if search("cluster.csv", fileopt): lelem = fileopt.split("_") cell = lelem [0] colorChannel = "_".join(lelem[1:-1]) if FP == 1: desctype = self.proptimal.split("/")[-3][0:-13] pdesc = self.cdesc.pFP else: desctype = "Desc" + "-" + str(self.distmeth) + "-" + str(self.aggType) pdesc = self.pdesclean prtemp = pathFolder.createFolder(prFinalClustering + colorChannel + "/" + cell + "/" + desctype + "/") pcluster = prtemp + fileopt copyfile(self.proptimal + fileopt, pcluster) runExternalSoft.finalClustering(pdesc, self.pAC50All, pcluster, cell + "_" + colorChannel, self.distmeth, self.aggType, prtemp) return 0
def computeFP(self, FPtype): # set SMI after cleanning prSMIclean = self.prDesc + "SMIclean/" pathFolder.createFolder(prSMIclean) self.prSMIclean = prSMIclean dFP = {} i = 1 for pSMI in listdir( self.prSMI ): # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # for pSMI in ["/home/borrela2/interference/spDataAnalysis/Desc/SMIclean/1212-72-2.smi"]: # to verify for one chem cas = pSMI.split("/")[-1].split(".")[0] print cas, i, len(listdir(self.prSMI)) i += 1 psmiles = self.prSMI + cas + ".smi" if path.exists(self.prSMI + cas + ".smi"): fsmiles = open(psmiles, "r") smiles = fsmiles.readlines()[0].strip() fsmiles.close() # chemical chem = chemical.chemical(cas, smiles) chem.prepareChem(prSMIclean) error = chem.computeFP(FPtype) if error == 1: print "ERROR FP" continue else: dFP[cas] = chem.FP self.dFP = dFP
def computeMissingPNG(self): pr_organisedPNG = pathFolder.createFolder(self.pr_out + "PNGorganized/") pr_temp = pathFolder.createFolder(self.pr_out + "temp_PNG/") # load from table l_chem_chemicalsDB = self.cDB.execCMD( "SELECT DISTINCT smiles_clean, inchikey FROM chemicals WHERE inchikey is not null AND smiles_clean is not NULL" ) # extract dsstoxID when inchikey is not null shuffle(l_chem_chemicalsDB) i = 0 imax = len(l_chem_chemicalsDB) compute = 0 while i < imax: if i % 100 == 0: print(i, compute) smiles_clean = l_chem_chemicalsDB[i][0] inchikey = l_chem_chemicalsDB[i][1] p_png = pr_organisedPNG + inchikey[:2] + "/" + inchikey[ 2:4] + "/" + inchikey + ".png" if not path.exists(p_png): cChem = CompDesc.CompDesc(smiles_clean, pr_temp) cChem.inchikey = inchikey cChem.smi = smiles_clean cChem.computePNG(bg="none") p_png_temp = pr_temp + "PNG/" + inchikey + ".png" if path.exists(p_png_temp) and path.getsize(p_png_temp) > 0: pathFolder.createFolder(pr_organisedPNG + inchikey[:2] + "/" + inchikey[2:4] + "/") copyfile(p_png_temp, p_png) compute = compute + 1 i = i + 1
def organizePNG(self, cleaning=0): pr_organisedPNG = pathFolder.createFolder(self.pr_out + "PNGorganized/", clean=cleaning) pr_desc_png = self.pr_desc + "PNG/" l_png = listdir(pr_desc_png) shuffle(l_png) i = 0 imax = len(l_png) while i < imax: if i % 100 == 0: print(i) p_png = l_png[i] # case of file remove before try: fsize = path.getsize(pr_desc_png + p_png) except: i = i + 1 continue if fsize == 0: remove(pr_desc_png + p_png) else: pr_1 = pr_organisedPNG + p_png[:2] + "/" pr_2 = pr_organisedPNG + p_png[:2] + "/" + p_png[2:4] + "/" p_png_out = pr_2 + p_png if not path.exists(p_png_out): pathFolder.createFolder(pr_1) pathFolder.createFolder(pr_2) copyfile(pr_desc_png + p_png, p_png_out) i = i + 1
def generatePNG(self): pathFolder.createFolder(self.prPNG) lnSMIs = listdir(self.prSMIclean) for nSMI in lnSMIs: runExternalSoft.molconvert(self.prSMIclean + nSMI, self.prPNG + nSMI[:-3] + "png")
def __init__(self, ltypeDesc, typeAff, cutoff, prout): self.ltypedesc = ltypeDesc self.cutoff = cutoff self.typeAff = typeAff # define folder with QSAR prout = prout + "-".join(self.ltypedesc) + "_" + str(typeAff) + "/" pathFolder.createFolder(prout) self.prout = prout
def summarize(self, opaff=1): for prdockingpose in self.lprdockingpose: prout = self.prout + prdockingpose.split("/")[-2] + "/" prsum = prout + "Summary/" print prsum pathFolder.createFolder(prsum) lposesAll = listdir(prdockingpose) pfilout = prsum + "summary" filout = open(pfilout, "w") if opaff == 0: filout.write( "ID\tMDockingScore\tSDDockingScore\tMEmodel\tSDEmodel\tNB\n" ) else: filout.write( "ID\tMDockingScore\tSDDockingScore\tMEmodel\tSDEmodel\tMAff\tSDAff\tNb\n" ) for cluster in self.clusters.keys(): ldockingScore = [] lemodel = [] laff = [] for compound in self.clusters[cluster]: for poseAll in lposesAll: if search(compound + ".1.sdf", poseAll): pposetemp = prdockingpose + poseAll break if pposetemp == "": continue csdf = parseSDF.sdf(pposetemp) ddock = csdf.get_dockingscore() pposetemp = "" ldockingScore.append(ddock[compound]["r_i_docking_score"]) lemodel.append(ddock[compound]["r_i_glide_emodel"]) if opaff != 0: laff.append(float(self.Aff[compound])) MdockingScore = mean(ldockingScore) SDdockingscore = std(ldockingScore) Memodel = mean(lemodel) SDemodel = std(lemodel) nb = len(lemodel) if opaff == 1: Maff = mean(laff) SDaff = std(laff) filout.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (cluster, MdockingScore, SDdockingscore, Memodel, SDemodel, Maff, SDaff, nb)) else: filout.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (cluster, MdockingScore, SDdockingscore, Memodel, SDemodel, nb)) filout.close()
def createSecondaryClustering(self, pClusters): fcluster = open(pClusters, "r") lchemicals = fcluster.readlines() fcluster.close() dclust = {} for chemical in lchemicals[1:]: chemical = chemical.strip().replace("\"", "") chemical = chemical.split(",") ID = chemical[0] cluster = chemical[1] if not cluster in dclust.keys(): dclust[cluster] = [] dclust[cluster].append(ID) #do different for FP and descriptor if self.distmeth == None: # write cluster and chemical for cluster in dclust.keys(): prcluster = self.prCluster + "Clust" + str(cluster) + "/" if not path.exists(prcluster + "cluster.csv"): pathFolder.createFolder(prcluster) pmatrix = prcluster + "FPmatrix" self.cdesc.reduceMatrixFP(dclust[cluster], pmatrix) runExternalSoft.clusteringSimilarityMatrix(pmatrix, prcluster, self.aggType, self.clusterMeth, self.optimalNBclustMeth) else: fdesc = open(self.pdesclean, "r") lchemdesc = fdesc.readlines() fdesc.close() ddesc = {} for chemdesc in lchemdesc[1:]: ID = chemdesc.split(",")[0].replace("\"", "") ddesc[ID] = chemdesc #write cluster and chemical for cluster in dclust.keys(): prcluster = self.prCluster + "Clust" + str(cluster) + "/" if not path.exists(prcluster + "cluster.csv"): pathFolder.createFolder(prcluster) pdesc = prcluster + "descClean.csv" fdesc = open(pdesc, "w") fdesc.write(lchemdesc[0]) for chemID in dclust[cluster]: fdesc.write(ddesc[chemID]) fdesc.close() runExternalSoft.clustering(pdesc, "0", prcluster, self.distmeth, self.aggType, self.clusterMeth, self.optimalNBclustMeth)
def corelAllAssays(self, cluc, chepg2, chek293): prInterfer = self.prout + "interfer/" pathFolder.createFolder(prInterfer) # cluster main fcluster = open(self.pclusters, "r") lclusters = fcluster.readlines() dclust = {} for clusters in lclusters: clusters = clusters.strip().split(",") chemID =clusters[0] clustname = str(clusters[1]) + "_" + str(clusters[2]) if not clustname in dclust.keys(): dclust[clustname] = [] dclust[clustname].append(chemID) #open descriptors fdesc = open(self.pdesc, "r") lchemdesc = fdesc.readlines() fdesc.close() ddesc = {} for chemdesc in lchemdesc[1:]: chemID = chemdesc.split("\t")[0] ddesc[chemID] = chemdesc for cluster in dclust.keys(): prclustsub = pathFolder.createFolder(prInterfer + str(cluster) + "/") #file descriptors pdesc = prclustsub + "desc.csv" filedesc = open(pdesc, "w") filedesc.write(lchemdesc[0]) for chemical in dclust[cluster]: #png ppng = self.prPNG + chemical + ".png" if path.exists(ppng): copyfile(ppng, prclustsub + chemical + ".png") #desc if chemical in ddesc.keys(): filedesc.write(ddesc[chemical]) fdesc.close() runExternalSoft.crossA50s(pdesc, cluc.pAC50, chepg2.pAC50, chek293.pAC50, prclustsub) return
def extractChemical(self, pSDFTox21): prSMI = self.prout + "SMI/" pathFolder.createFolder(prSMI) prSDF = self.prout + "SDF/" pathFolder.createFolder(prSDF) # load DB db = loadDB.sdfDB(pSDFTox21, "CASRN", self.prout) db.parseAll() # extract chemical lnotfind = [] for chem in self.lchem: # print chem.keys() cas = chem["CAS"] if cas == "": continue flag = 0 for cpdDB in db.lc: if cpdDB["CASRN"] == cas: # sdf pfilSDF = prSDF + str(cas) + ".sdf" if not path.exists(pfilSDF): filsdf = open(pfilSDF, "w") filsdf.write(cpdDB["sdf"]) filsdf.close() #Smile pfilSMI = prSMI + str(cas) + ".smi" if cpdDB["SMILES"] != "": if not path.exists(pfilSMI) and cpdDB["SMILES"] != "": filSMI = open(pfilSMI, "w") filSMI.write(cpdDB["SMILES"]) filSMI.close() flag = 1 break if flag == 0 and not cas in lnotfind: lnotfind.append(cas) logfile = open(self.prlog + self.name + "-extract.log", "w") logfile.write("\n".join(lnotfind)) logfile.close() self.prSMI = prSMI self.prSDF = prSDF
def drawMolecules(self, prpng): if not "prsdf" in dir(self): self.splitSDF() pathFolder.createFolder(prpng) self.prpng = prpng if len(listdir(self.prsdf)) == len(listdir(self.prpng)): return lfsdf = listdir(self.prsdf) for sdf in lfsdf: runExternalSoft.molconvert(self.prsdf + sdf, self.prpng + sdf[:-3] + "png")
def clusterizeTopActive(self, top): prtop = pathFolder.createFolder(self.prout + "top" + str(top) + "/") pafftop = prtop + "Aff" + str(top) + ".csv" if not path.exists(pafftop): # create top descriptor daff = toolbox.loadMatrixToDict(self.paff) if len(daff.keys()) <= top: copyfile(self.paff, pafftop) else: laff = [] for chemID in daff.keys(): laff.append(float(daff[chemID]["Aff"])) minAff = sorted(laff, reverse=True)[top - 1] lchem = [] for chemID in daff.keys(): if float(daff[chemID]["Aff"]) >= minAff: lchem.append(chemID) filout = open(pafftop, "w") filout.write("CHEMBLID\tAff\tType\n") for chem in lchem: filout.write("%s\t%s\t%s\n" % (daff[chem]["CHEMBLID"], daff[chem]["Aff"], daff[chem]["Type"])) filout.close() runExternalSoft.clusterize(self.pdesc, pafftop, self.typeAff, self.cutoff, prtop)
def __init__(self, p_sdf, p_LD50, pr_out): self.p_sdf = p_sdf self.p_LD50 = p_LD50 self.pr_out = pathFolder.createFolder(pr_out + "TOX_DB/") self.cDB = DBrequest.DBrequest()
def extractOnlyNewChem(self, name_table, field_comparison): pr_out = pathFolder.createFolder(self.pr_out + "updateDSSTOX/") p_filout = pr_out + "chem_list.txt" if path.exists(p_filout): filout = open(p_filout, "r") self.l_chem_toadd = filout.read().split("\n") filout.close() return filout = open(p_filout, "w") d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name) d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",") # extract list of chemicals in the DB l_chem_DB = self.cDB.execCMD("SELECT %s FROM %s" % (field_comparison, name_table)) for chem_DB in l_chem_DB: chem = chem_DB[0] if chem == None: continue try: del d_dsstox_name[chem] except: pass for chem in d_dsstox_name.keys(): filout.write(chem + "\n") filout.close() self.l_chem_toadd = list(d_dsstox_name.keys())
def updateSMILES(self, name_table="chemicals"): """Function use to update the chemical table => check if smiles origin change""" d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",") d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name) self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/") #extract chemical without DTXSID # see if chem included cmd_SQL = "SELECT id, dsstox_id, smiles_origin, inchikey, smiles_clean FROM %s " % ( name_table) l_chem_DB = self.cDB.execCMD(cmd_SQL) d_chem_DB = {} for chem_DB in l_chem_DB: d_chem_DB[chem_DB[1]] = [ chem_DB[0], chem_DB[2], chem_DB[3], chem_DB[4] ] i = 0 for chem in d_dsstox_SMILES.keys(): dsstox_id = d_dsstox_SMILES[chem]["dsstox_substance_id"] inchkey = d_dsstox_SMILES[chem]["InChI Key_QSARr"] smiles = d_dsstox_SMILES[chem]["Original_SMILES"] smiles_cleaned = d_dsstox_SMILES[chem]["Canonical_QSARr"] try: smiles_indb = d_chem_DB[dsstox_id][ 1] # case of chemical is not in the DB except: continue inchkey_db = d_chem_DB[dsstox_id][2] smiles_cleaned_db = d_chem_DB[dsstox_id][3] smiles_db = d_chem_DB[dsstox_id][1] if smiles != smiles_db: # recompute cleaned SMILES c_chem = CompDesc.CompDesc(smiles, self.pr_desc) c_chem.prepChem() if c_chem.err == 0: c_chem.generateInchiKey() else: c_chem.smi = None if c_chem.err == 0: inchikey = c_chem.inchikey else: inchikey = None if d_chem_DB[dsstox_id][2] != inchikey: cmd_sql = "UPDATE %s SET smiles_origin = '%s', smiles_clean = '%s', inchikey='%s' WHERE id='%s';" % ( name_table, smiles, c_chem.smi, inchikey, d_chem_DB[dsstox_id][0]) else: continue #cmd_sql = "UPDATE %s SET smiles_origin = '%s' WHERE id='%s';"%(name_table, smiles, d_chem_DB[dsstox_id][0]) #print(smiles_cleaned,smiles_indb, dsstox_id) print(i) i = i + 1 self.cDB.updateTable(cmd_sql) return
def rankCompounds(ptableCluster, prcluster, pMIC_molar, prrank): drank = {} dMIC = toolbox.loadMatrix(pMIC_molar) dcluster = toolbox.loadMatrix(ptableCluster, ",") lorga = dMIC[dMIC.keys()[0]].keys() del lorga[lorga.index("CMPD_CHEMBLID")] for orga in lorga: drank[orga] = [] for chem in dMIC.keys(): drank[orga].append(float(dMIC[chem][orga])) for orga in drank.keys(): drank[orga] = list(sorted(drank[orga], reverse = False)) for orga in drank.keys(): prdata = pathFolder.createFolder(prrank + orga + "/") lchem = [] r = 1 for MIC in drank[orga]: for chem in dMIC.keys(): if not chem in dcluster.keys(): continue if float(dMIC[chem][orga]) == float(MIC) and not chem in lchem: print dcluster[chem] print prcluster + "cluster" + str(dcluster[chem]["cluster"]) + "/" + chem + ".jpeg" copyfile(prcluster + "cluster" + str(dcluster[chem]["cluster"]) + "/" + chem + ".jpeg", prdata + str(r) + "_" + chem + "_" + str(dcluster[chem]["cluster"]) + ".jpeg") lchem.append(chem) r = r + 1
def pushNeighbors(self): prneighbor = pathFolder.createFolder(self.prout + "Neighbors/") ptable3Dim = prneighbor + "Table_DIM1D2D-2_1.csv" ptableNDim = prneighbor + "Table_DIM1D2D-170_207.csv" if path.exists(ptable3Dim) and path.exists(ptableNDim): ddist3D = toolbox.loadMatrixToDict(ptable3Dim) for chem in ddist3D.keys(): ddist3D[chem] = ddist3D[chem]["Neighbors"].split(" ") ddistND = toolbox.loadMatrixToDict(ptableNDim) for chem in ddistND.keys(): ddistND[chem] = ddistND[chem]["Neighbors"].split(" ") cDB = DBrequest.DBrequest() cDB.verbose = 0 for chem in ddist3D.keys(): # print(chem) #out1D2D = cDB.getRow("%s_neighbors"%(self.nameMap), "inchikey='%s'" % (chem)) out1D2D = [] if out1D2D == []: w3D = "{" + ",".join( ["\"%s\"" % (neighbor) for neighbor in ddist3D[chem]]) + "}" wND = "{" + ",".join( ["\"%s\"" % (neighbor) for neighbor in ddistND[chem]]) + "}" cDB.addElement( "%s_neighbors" % (self.nameMap), ["inchikey", "neighbors_dim3", "neighbors_dimn"], [chem, w3D, wND])
def validationPredictor(self, typeCellChannel, pAC50All): dAC50All = toolbox.loadMatrix(pAC50All) dCASact = {} dpredict = {} dCASact[typeCellChannel] = [] for CASID in dAC50All.keys(): # have to change if dAC50All[CASID][typeCellChannel] != "NA": dCASact[typeCellChannel].append(CASID) if not CASID in dpredict.keys(): if not path.exists(self.cDB.prSMIclean + CASID + ".smi"): continue else: smiles = toolbox.loadSMILES(self.cDB.prSMIclean + CASID + ".smi") dpredict[CASID] = self.predictSMI(CASID, smiles, plot=1) prval = pathFolder.createFolder(self.prout + "validation/" + typeCellChannel + "/") for typeAssay in dCASact.keys(): channel = "_".join(typeAssay.split("_")[1:]) cell = typeAssay.split("_")[0] kpred = str(cell) + "_" + str(channel) ldesc = dpredict[dpredict.keys()[0]][kpred] filout = open(prval + typeCellChannel, "w") filout.write("CASID" + "\t".join(typeCellChannel) + "\n") for CASID in dpredict.keys(): filout.write(CASID) for desc in ldesc: filout.write("\t" + str(dpredict[CASID][kpred][desc])) filout.write("\n") filout.close() return 0
def rankingAC50(self): prRank = self.prAnalysis + "ranking/" pathFolder.createFolder(prRank) fchemAC50 = open(self.pAC50clean, "r") lchemac50 = fchemAC50.readlines() fchemAC50.close() dstock = {} lheader = lchemac50[0].strip().split(",") i = 1 dstock["name"] = [] while i < len(lheader): dstock[i] = [] i += 1 for chemAC50 in lchemac50[1:]: lelem = chemAC50.strip().split(",") name = lelem[0] dstock["name"].append(name) i = 1 while i < len(lelem): dstock[i].append(lelem[i]) i += 1 for i in range(1, len(lheader)): dstock[i] = toolbox.rankList(dstock[i]) # generate 3 files ordered differently for i in range(1, len(lheader)): prank = prRank + str(lheader[i].replace("\"", "")) + "_rank.txt" frank = open(prank, "w") frank.write(lchemac50[0]) lisorted = sorted(range(len(dstock[i])), key=lambda k: dstock[i][k]) for isorted in lisorted: j = 1 frank.write(str(dstock["name"][isorted])) while j < len(lheader): frank.write("," + str(dstock[j][isorted])) j += 1 frank.write("\n") frank.close()
def splitSDF(self): if not "lc" in dir(self): self.parseAll() prSDF = self.prout + "cpdsdf/" pathFolder.createFolder(prSDF) self.prsdf = prSDF if len(self.lc) == len(listdir(prSDF)): return else: for compound in self.lc: pfilout = self.prsdf + compound[self.name] + ".sdf" filout = open(pfilout, "w") filout.write(compound["sdf"]) filout.close()
def __init__(self, name_update, pr_OPERA_preproc, pr_out): self.name_update = name_update self.pr_OPERA_preproc = pr_OPERA_preproc self.pr_out = pr_out self.loadOPERAFileInClass() self.cDB = DBrequest.DBrequest(verbose=0) self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/")
def __init__(self, pfilin, curvecutoff, effcutoff, curvePositive, curveNegative, prcytox, prout, prlog): self.pfilin = pfilin self.prcytox = prcytox self.name = pfilin.split("/")[-1].split(".")[0] self.loadAssay() proutSP = prout + pfilin.split("/")[-1].split(".")[0] + "/" pathFolder.createFolder(proutSP) self.proutSP = proutSP self.prout = prout self.prlog = prlog self.curveCutoff = curvecutoff self.curvePositive = curvePositive self.curveNegative = curveNegative self.effcutoff = effcutoff