def formatOPERAForToolChem(self): # to change c_chem = CompDesc.CompDesc("", "") l_desc2D = c_chem.getLdesc("2D") pr_desc = self.pr_out + "DESC/" l_file_chem = listdir(self.pr_forToolChem) for file_chem in l_file_chem: if file_chem != "chemicals_listNew.csv": ############################################################## continue ########################################################################################## p_filout = self.pr_forToolChem + file_chem[:-4] + "_desc2D.csv" filout = open(p_filout, "w") filout.write("inchikey\t" + "\t".join(l_desc2D)) l_chem = toolbox.loadMatrixToList(self.pr_forToolChem + file_chem) i = 0 imax = len(l_chem) while i < imax: d_chem = l_chem[i] c_chem = CompDesc.CompDesc(d_chem["smiles_origin"], pr_desc) c_chem.prepChem() if c_chem.err == 0: c_chem.computeAll2D() if c_chem.err == 0: filout.write("%s\t%s\n" % (c_chem.inchikey, "\t".join( [str(c_chem.all2D[desc]) for desc in l_desc2D]))) i = i + 1 filout.close()
def test_generate3D(self): cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/") cChem.prepChem() cChem.set3DChemical() self.assertEqual(cChem.err, 0) rmtree("./tests/MOLCLEAN") rmtree("./tests/SDF3D")
def computeMissingPNG(self): pr_organisedPNG = pathFolder.createFolder(self.pr_out + "PNGorganized/") pr_temp = pathFolder.createFolder(self.pr_out + "temp_PNG/") # load from table l_chem_chemicalsDB = self.cDB.execCMD( "SELECT DISTINCT smiles_clean, inchikey FROM chemicals WHERE inchikey is not null AND smiles_clean is not NULL" ) # extract dsstoxID when inchikey is not null shuffle(l_chem_chemicalsDB) i = 0 imax = len(l_chem_chemicalsDB) compute = 0 while i < imax: if i % 100 == 0: print(i, compute) smiles_clean = l_chem_chemicalsDB[i][0] inchikey = l_chem_chemicalsDB[i][1] p_png = pr_organisedPNG + inchikey[:2] + "/" + inchikey[ 2:4] + "/" + inchikey + ".png" if not path.exists(p_png): cChem = CompDesc.CompDesc(smiles_clean, pr_temp) cChem.inchikey = inchikey cChem.smi = smiles_clean cChem.computePNG(bg="none") p_png_temp = pr_temp + "PNG/" + inchikey + ".png" if path.exists(p_png_temp) and path.getsize(p_png_temp) > 0: pathFolder.createFolder(pr_organisedPNG + inchikey[:2] + "/" + inchikey[2:4] + "/") copyfile(p_png_temp, p_png) compute = compute + 1 i = i + 1
def formatPrepChemForToolChem(self): pr_desc = self.pr_out + "DESC/" l_file_chem = listdir(self.pr_forToolChem) for file_chem in l_file_chem: if file_chem != "chemicals_listNew.csv": ############################################################## continue ########################################################################################## p_filout = self.pr_forToolChem + file_chem[:-4] + "_chemPrep.csv" filout = open(p_filout, "w") filout.write( "dsstox_id\tsmiles_origin\tsmiles_cleaned\tinchikey\tdrugbank_id\tcasn\tname\n" ) l_chem = toolbox.loadMatrixToList(self.pr_forToolChem + file_chem) i = 0 imax = len(l_chem) while i < imax: d_chem = l_chem[i] c_chem = CompDesc.CompDesc(d_chem["smiles_origin"], pr_desc) c_chem.prepChem() if c_chem.err == 0: c_chem.computeAll2D() if c_chem.err == 0: filout.write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (d_chem["dsstox_id"], d_chem["smiles_origin"], c_chem.smi, c_chem.inchikey, "NA", d_chem["casn"], d_chem["name"])) i = i + 1 filout.close()
def updateSMILES(self, name_table="chemicals"): """Function use to update the chemical table => check if smiles origin change""" d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",") d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name) self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/") #extract chemical without DTXSID # see if chem included cmd_SQL = "SELECT id, dsstox_id, smiles_origin, inchikey, smiles_clean FROM %s " % ( name_table) l_chem_DB = self.cDB.execCMD(cmd_SQL) d_chem_DB = {} for chem_DB in l_chem_DB: d_chem_DB[chem_DB[1]] = [ chem_DB[0], chem_DB[2], chem_DB[3], chem_DB[4] ] i = 0 for chem in d_dsstox_SMILES.keys(): dsstox_id = d_dsstox_SMILES[chem]["dsstox_substance_id"] inchkey = d_dsstox_SMILES[chem]["InChI Key_QSARr"] smiles = d_dsstox_SMILES[chem]["Original_SMILES"] smiles_cleaned = d_dsstox_SMILES[chem]["Canonical_QSARr"] try: smiles_indb = d_chem_DB[dsstox_id][ 1] # case of chemical is not in the DB except: continue inchkey_db = d_chem_DB[dsstox_id][2] smiles_cleaned_db = d_chem_DB[dsstox_id][3] smiles_db = d_chem_DB[dsstox_id][1] if smiles != smiles_db: # recompute cleaned SMILES c_chem = CompDesc.CompDesc(smiles, self.pr_desc) c_chem.prepChem() if c_chem.err == 0: c_chem.generateInchiKey() else: c_chem.smi = None if c_chem.err == 0: inchikey = c_chem.inchikey else: inchikey = None if d_chem_DB[dsstox_id][2] != inchikey: cmd_sql = "UPDATE %s SET smiles_origin = '%s', smiles_clean = '%s', inchikey='%s' WHERE id='%s';" % ( name_table, smiles, c_chem.smi, inchikey, d_chem_DB[dsstox_id][0]) else: continue #cmd_sql = "UPDATE %s SET smiles_origin = '%s' WHERE id='%s';"%(name_table, smiles, d_chem_DB[dsstox_id][0]) #print(smiles_cleaned,smiles_indb, dsstox_id) print(i) i = i + 1 self.cDB.updateTable(cmd_sql) return
def test_FP(self): cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/") cChem.prepChem() cChem.computeFP("All") # test comparison cCchem2 = CompDesc.CompDesc("CCCO", "./tests/") cCchem2.prepChem() cCchem2.computeFP("All") l_dist = ["Tanimoto", "Dice", "Cosine", "Sokal", "Russel", "RogotGoldberg", "Kulczynski", "McConnaughey", "Asymmetric", "BraunBlanquet", "AllBit"] l_fp = ['Mol', 'pairs', 'MACCS', 'Torsion', 'Morgan'] for fp in l_fp: for dist in l_dist: print(fp, dist, cChem.computeSimilarityFP(cCchem2, fp, dist)) # test all combination self.assertEqual(cChem.err, 0)
def test_computeOPERAServer(self): cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/") cChem.prepChem() cChem.computePADEL2DFPandCDK() cChem.computeOperaDesc() self.assertEqual(cChem.err, 0) system("rm -rf ./tests/PADEL*") system("rm -rf ./tests/cdk_desc*") rmtree("./tests/OPERA")
def test_computeOPERA(self): cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/") cChem.prepChem() cChem.computeOPERAFromChem(update=1) self.assertEqual(cChem.err, 0) system("rm -rf ./tests/PADEL*") system("rm -rf ./tests/CDK*") system("rm -rf ./tests/cdk_desc*") rmtree("./tests/OPERA")
def test_compute3Ddesc(self): cChem = CompDesc.CompDesc("CC(C)C[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C@H](CCC(=O)O)NC(=O)[C@@H](N)CCCCN)C(=O)N[C@@H](Cc1cnc[nH]1)C(=O)N1CCC[C@H]1C(=O)N[C@@H](CO)C(=O)N[C@@H](CCCCN)C(=O)N[C@H](C(=O)N[C@@H](CC(=O)O)C(=O)N[C@@H](CC(C)C)C(=O)N1CCC[C@H]1C(=O)N[C@@H](CCCNC(=N)N)C(=O)O)C(C)C", "./tests/") cChem.prepChem() cChem.set3DChemical() cChem.computeAll3D() self.assertEqual(cChem.err, 0) rmtree("./tests/MOLCLEAN") rmtree("./tests/SDF3D") rmtree("./tests/3D")
def test_knimeConvert(self): cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/") cChem.prepChem() cChem.computeAll2D() cChem.convertDesc2DtoKnimeDesc() err = 0 try: test = cChem.all2D["AMW"] except:err = 1 self.assertEqual(err, 0)
def computeDescNewChem(self): if not "l_chem_toadd" in self.__dict__: self.extractOnlyNewChem("chemicals", "dsstox_id") self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/") d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",") l_chem_add = self.l_chem_toadd shuffle(l_chem_add) i = 0 imax = len(self.l_chem_toadd) print(imax) while i < imax: if i % 1000 == 0: print(i) chem = l_chem_add[i] try: smiles = d_dsstox_SMILES[chem]["Original_SMILES"] except: print(i, ": ERROR in SMILES - ", chem) i = i + 1 continue cChem = CompDesc.CompDesc(smiles, self.pr_desc) cChem.prepChem() if cChem.err == 0: cChem.generateInchiKey() if cChem.err == 1: print("Error inch: %s" % (l_chem_add[i])) i = i + 1 continue # 2D desc cChem.computeAll2D() cChem.writeMatrix("2D") #3D desc cChem.set3DChemical() if cChem.err == 0: cChem.computeAll3D() if cChem.err == 1: print("Error 3D desc: %s -- %s" % (l_chem_add[i], i)) else: cChem.writeMatrix("3D") else: print("Error 3D generation: %s -- %s" % (l_chem_add[i], i)) else: print("Error prep: %s -- %s" % (l_chem_add[i], i)) i = i + 1
def test_PrepChem(self): cChem = CompDesc.CompDesc("O=S(=O)(O)c1ccc([Hg])cc1", "./tests/") cChem.prepChem() smi = cChem.smi self.assertEqual(smi, "O=S(=O)(O)c1ccc([Hg])cc1")
def test_compute1D2Ddesc(self): cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/") cChem.prepChem() cChem.computeAll2D() self.assertEqual(cChem.err, 0)
def pushNewChemInDB(self, name_table="chemicals"): if not "l_chem_toadd" in self.__dict__: self.extractOnlyNewChem(name_table, field_comparison) d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name) d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",") self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/") id_chem = self.cDB.execCMD("SELECT MAX(id) FROM %s" % (name_table))[0][0] i = 0 imax = len(self.l_chem_toadd) #imax = 100 print(imax) cmd_all = "" while i < imax: if i % 1000 == 0: print(i) chem = self.l_chem_toadd[i] # case of 50K chemicals without SMILES try: smiles = d_dsstox_SMILES[chem]["Original_SMILES"] except: smiles = "" name = d_dsstox_name[chem]["preferred_name"] casrn = d_dsstox_name[chem]["casrn"] cChem = CompDesc.CompDesc(smiles, self.pr_desc) if smiles != "": cChem.prepChem() else: cChem.err = 1 if cChem.err == 0: smi_cleaned = cChem.smi if cChem.err == 1: inch = "" else: inch = cChem.generateInchiKey() else: smi_cleaned = "" inch = "" id_chem = id_chem + 1 if smiles == "": cmd_sql = "INSERT INTO %s (id, dsstox_id, casn, name) VALUES (%s, '%s', '%s', '%s');\n" % ( name_table, id_chem, chem, casrn, name.replace("'", "''")) elif smi_cleaned == "": cmd_sql = "INSERT INTO %s (id, smiles_origin, dsstox_id, casn, name) VALUES (%s, '%s', '%s', '%s', '%s');\n" % ( name_table, id_chem, smiles, chem, casrn, name.replace("'", "''")) elif inch == "": cmd_sql = "INSERT INTO %s (id, smiles_origin, smiles_clean, dsstox_id, casn, name) VALUES (%s, '%s', '%s', '%s', '%s', '%s');\n" % ( name_table, id_chem, smiles, smi_cleaned, chem, casrn, name.replace("'", "''")) else: cmd_sql = "INSERT INTO %s (id, smiles_origin, smiles_clean, inchikey, dsstox_id, casn, name) VALUES (%s, '%s', '%s', '%s', '%s', '%s', '%s');\n" % ( name_table, id_chem, smiles, smi_cleaned, inch, chem, casrn, name.replace("'", "''")) i = i + 1 cmd_all = cmd_all + cmd_sql self.cDB.runCMDaddElement(cmd_all)