def formatOPERAForToolChem(self):

        # to change
        c_chem = CompDesc.CompDesc("", "")
        l_desc2D = c_chem.getLdesc("2D")

        pr_desc = self.pr_out + "DESC/"

        l_file_chem = listdir(self.pr_forToolChem)
        for file_chem in l_file_chem:
            if file_chem != "chemicals_listNew.csv":  ##############################################################
                continue  ##########################################################################################
            p_filout = self.pr_forToolChem + file_chem[:-4] + "_desc2D.csv"
            filout = open(p_filout, "w")
            filout.write("inchikey\t" + "\t".join(l_desc2D))
            l_chem = toolbox.loadMatrixToList(self.pr_forToolChem + file_chem)
            i = 0
            imax = len(l_chem)
            while i < imax:
                d_chem = l_chem[i]
                c_chem = CompDesc.CompDesc(d_chem["smiles_origin"], pr_desc)
                c_chem.prepChem()
                if c_chem.err == 0:
                    c_chem.computeAll2D()
                    if c_chem.err == 0:
                        filout.write("%s\t%s\n" % (c_chem.inchikey, "\t".join(
                            [str(c_chem.all2D[desc]) for desc in l_desc2D])))
                i = i + 1
            filout.close()
Exemple #2
0
 def test_generate3D(self):
     cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/")
     cChem.prepChem()
     cChem.set3DChemical()
     self.assertEqual(cChem.err, 0)
     rmtree("./tests/MOLCLEAN")
     rmtree("./tests/SDF3D")
    def computeMissingPNG(self):

        pr_organisedPNG = pathFolder.createFolder(self.pr_out +
                                                  "PNGorganized/")
        pr_temp = pathFolder.createFolder(self.pr_out + "temp_PNG/")

        # load from table
        l_chem_chemicalsDB = self.cDB.execCMD(
            "SELECT DISTINCT smiles_clean, inchikey FROM chemicals WHERE inchikey is not null AND smiles_clean is not NULL"
        )  # extract dsstoxID when inchikey is not null
        shuffle(l_chem_chemicalsDB)
        i = 0
        imax = len(l_chem_chemicalsDB)
        compute = 0
        while i < imax:
            if i % 100 == 0:
                print(i, compute)

            smiles_clean = l_chem_chemicalsDB[i][0]
            inchikey = l_chem_chemicalsDB[i][1]
            p_png = pr_organisedPNG + inchikey[:2] + "/" + inchikey[
                2:4] + "/" + inchikey + ".png"
            if not path.exists(p_png):
                cChem = CompDesc.CompDesc(smiles_clean, pr_temp)
                cChem.inchikey = inchikey
                cChem.smi = smiles_clean
                cChem.computePNG(bg="none")
                p_png_temp = pr_temp + "PNG/" + inchikey + ".png"
                if path.exists(p_png_temp) and path.getsize(p_png_temp) > 0:
                    pathFolder.createFolder(pr_organisedPNG + inchikey[:2] +
                                            "/" + inchikey[2:4] + "/")
                    copyfile(p_png_temp, p_png)
                    compute = compute + 1

            i = i + 1
    def formatPrepChemForToolChem(self):

        pr_desc = self.pr_out + "DESC/"

        l_file_chem = listdir(self.pr_forToolChem)
        for file_chem in l_file_chem:
            if file_chem != "chemicals_listNew.csv":  ##############################################################
                continue  ##########################################################################################
            p_filout = self.pr_forToolChem + file_chem[:-4] + "_chemPrep.csv"
            filout = open(p_filout, "w")
            filout.write(
                "dsstox_id\tsmiles_origin\tsmiles_cleaned\tinchikey\tdrugbank_id\tcasn\tname\n"
            )
            l_chem = toolbox.loadMatrixToList(self.pr_forToolChem + file_chem)
            i = 0
            imax = len(l_chem)
            while i < imax:
                d_chem = l_chem[i]
                c_chem = CompDesc.CompDesc(d_chem["smiles_origin"], pr_desc)
                c_chem.prepChem()
                if c_chem.err == 0:
                    c_chem.computeAll2D()
                    if c_chem.err == 0:
                        filout.write(
                            "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" %
                            (d_chem["dsstox_id"], d_chem["smiles_origin"],
                             c_chem.smi, c_chem.inchikey, "NA", d_chem["casn"],
                             d_chem["name"]))
                i = i + 1
            filout.close()
    def updateSMILES(self, name_table="chemicals"):
        """Function use to update the chemical table => check if smiles origin change"""

        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")
        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)
        self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/")

        #extract chemical without DTXSID
        # see if chem included
        cmd_SQL = "SELECT id, dsstox_id, smiles_origin, inchikey, smiles_clean FROM %s " % (
            name_table)
        l_chem_DB = self.cDB.execCMD(cmd_SQL)

        d_chem_DB = {}
        for chem_DB in l_chem_DB:
            d_chem_DB[chem_DB[1]] = [
                chem_DB[0], chem_DB[2], chem_DB[3], chem_DB[4]
            ]

        i = 0
        for chem in d_dsstox_SMILES.keys():
            dsstox_id = d_dsstox_SMILES[chem]["dsstox_substance_id"]
            inchkey = d_dsstox_SMILES[chem]["InChI Key_QSARr"]
            smiles = d_dsstox_SMILES[chem]["Original_SMILES"]
            smiles_cleaned = d_dsstox_SMILES[chem]["Canonical_QSARr"]
            try:
                smiles_indb = d_chem_DB[dsstox_id][
                    1]  # case of chemical is not in the DB
            except:
                continue
            inchkey_db = d_chem_DB[dsstox_id][2]
            smiles_cleaned_db = d_chem_DB[dsstox_id][3]
            smiles_db = d_chem_DB[dsstox_id][1]
            if smiles != smiles_db:
                # recompute cleaned SMILES
                c_chem = CompDesc.CompDesc(smiles, self.pr_desc)
                c_chem.prepChem()
                if c_chem.err == 0:
                    c_chem.generateInchiKey()
                else:
                    c_chem.smi = None
                if c_chem.err == 0:
                    inchikey = c_chem.inchikey
                else:
                    inchikey = None

                if d_chem_DB[dsstox_id][2] != inchikey:
                    cmd_sql = "UPDATE %s SET smiles_origin = '%s', smiles_clean = '%s', inchikey='%s' WHERE id='%s';" % (
                        name_table, smiles, c_chem.smi, inchikey,
                        d_chem_DB[dsstox_id][0])

                else:
                    continue  #cmd_sql = "UPDATE %s SET smiles_origin = '%s' WHERE id='%s';"%(name_table, smiles, d_chem_DB[dsstox_id][0])

                #print(smiles_cleaned,smiles_indb, dsstox_id)
                print(i)
                i = i + 1
                self.cDB.updateTable(cmd_sql)

        return
Exemple #6
0
    def test_FP(self):

        cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/")
        cChem.prepChem()
        cChem.computeFP("All")
        
        # test comparison
        cCchem2 = CompDesc.CompDesc("CCCO", "./tests/")
        cCchem2.prepChem()
        cCchem2.computeFP("All")

        l_dist = ["Tanimoto", "Dice", "Cosine", "Sokal", "Russel", "RogotGoldberg", "Kulczynski", "McConnaughey", "Asymmetric", "BraunBlanquet", "AllBit"]
        l_fp = ['Mol', 'pairs', 'MACCS', 'Torsion', 'Morgan']
        for fp in l_fp:
            for dist in l_dist:
                print(fp, dist, cChem.computeSimilarityFP(cCchem2, fp, dist)) 
        # test all combination
        self.assertEqual(cChem.err, 0)
Exemple #7
0
 def test_computeOPERAServer(self):
     cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/")
     cChem.prepChem()
     cChem.computePADEL2DFPandCDK()
     cChem.computeOperaDesc()
     self.assertEqual(cChem.err, 0)
     system("rm -rf ./tests/PADEL*")
     system("rm -rf ./tests/cdk_desc*")
     rmtree("./tests/OPERA")
Exemple #8
0
 def test_computeOPERA(self):
     cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/")
     cChem.prepChem()
     cChem.computeOPERAFromChem(update=1)
     self.assertEqual(cChem.err, 0)
     system("rm -rf ./tests/PADEL*")
     system("rm -rf ./tests/CDK*")
     system("rm -rf ./tests/cdk_desc*")
     rmtree("./tests/OPERA")
Exemple #9
0
 def test_compute3Ddesc(self):
     cChem = CompDesc.CompDesc("CC(C)C[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C@H](CCC(=O)O)NC(=O)[C@@H](N)CCCCN)C(=O)N[C@@H](Cc1cnc[nH]1)C(=O)N1CCC[C@H]1C(=O)N[C@@H](CO)C(=O)N[C@@H](CCCCN)C(=O)N[C@H](C(=O)N[C@@H](CC(=O)O)C(=O)N[C@@H](CC(C)C)C(=O)N1CCC[C@H]1C(=O)N[C@@H](CCCNC(=N)N)C(=O)O)C(C)C", "./tests/")
     cChem.prepChem()
     cChem.set3DChemical()
     cChem.computeAll3D()
     self.assertEqual(cChem.err, 0)
     rmtree("./tests/MOLCLEAN")
     rmtree("./tests/SDF3D")
     rmtree("./tests/3D")
Exemple #10
0
 def test_knimeConvert(self):
     cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/")
     cChem.prepChem()
     cChem.computeAll2D()
     cChem.convertDesc2DtoKnimeDesc()
     err = 0
     try: test = cChem.all2D["AMW"]
     except:err = 1
     self.assertEqual(err, 0)
    def computeDescNewChem(self):

        if not "l_chem_toadd" in self.__dict__:
            self.extractOnlyNewChem("chemicals", "dsstox_id")

        self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/")
        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")

        l_chem_add = self.l_chem_toadd
        shuffle(l_chem_add)

        i = 0
        imax = len(self.l_chem_toadd)
        print(imax)
        while i < imax:
            if i % 1000 == 0:
                print(i)
            chem = l_chem_add[i]

            try:
                smiles = d_dsstox_SMILES[chem]["Original_SMILES"]
            except:
                print(i, ": ERROR in SMILES - ", chem)
                i = i + 1
                continue

            cChem = CompDesc.CompDesc(smiles, self.pr_desc)
            cChem.prepChem()

            if cChem.err == 0:
                cChem.generateInchiKey()
                if cChem.err == 1:
                    print("Error inch: %s" % (l_chem_add[i]))
                    i = i + 1
                    continue

                # 2D desc
                cChem.computeAll2D()
                cChem.writeMatrix("2D")

                #3D desc
                cChem.set3DChemical()
                if cChem.err == 0:
                    cChem.computeAll3D()
                    if cChem.err == 1:
                        print("Error 3D desc: %s -- %s" % (l_chem_add[i], i))
                    else:
                        cChem.writeMatrix("3D")
                else:
                    print("Error 3D generation: %s -- %s" % (l_chem_add[i], i))
            else:
                print("Error prep: %s -- %s" % (l_chem_add[i], i))
            i = i + 1
Exemple #12
0
 def test_PrepChem(self):
     cChem = CompDesc.CompDesc("O=S(=O)(O)c1ccc([Hg])cc1", "./tests/")
     cChem.prepChem()
     smi = cChem.smi
     self.assertEqual(smi, "O=S(=O)(O)c1ccc([Hg])cc1")
Exemple #13
0
 def test_compute1D2Ddesc(self):
     cChem = CompDesc.CompDesc("N=C(O)[C@@H](N)CS", "./tests/")
     cChem.prepChem()
     cChem.computeAll2D()
     self.assertEqual(cChem.err, 0)
    def pushNewChemInDB(self, name_table="chemicals"):

        if not "l_chem_toadd" in self.__dict__:
            self.extractOnlyNewChem(name_table, field_comparison)

        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)
        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")

        self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/")
        id_chem = self.cDB.execCMD("SELECT MAX(id) FROM %s" %
                                   (name_table))[0][0]

        i = 0
        imax = len(self.l_chem_toadd)
        #imax = 100
        print(imax)
        cmd_all = ""

        while i < imax:
            if i % 1000 == 0:
                print(i)
            chem = self.l_chem_toadd[i]

            # case of 50K chemicals without SMILES
            try:
                smiles = d_dsstox_SMILES[chem]["Original_SMILES"]
            except:
                smiles = ""

            name = d_dsstox_name[chem]["preferred_name"]
            casrn = d_dsstox_name[chem]["casrn"]

            cChem = CompDesc.CompDesc(smiles, self.pr_desc)
            if smiles != "":
                cChem.prepChem()
            else:
                cChem.err = 1

            if cChem.err == 0:
                smi_cleaned = cChem.smi
                if cChem.err == 1:
                    inch = ""
                else:
                    inch = cChem.generateInchiKey()
            else:
                smi_cleaned = ""
                inch = ""

            id_chem = id_chem + 1
            if smiles == "":
                cmd_sql = "INSERT INTO %s (id, dsstox_id, casn, name) VALUES (%s, '%s', '%s', '%s');\n" % (
                    name_table, id_chem, chem, casrn, name.replace("'", "''"))
            elif smi_cleaned == "":
                cmd_sql = "INSERT INTO %s (id, smiles_origin, dsstox_id, casn, name) VALUES (%s, '%s', '%s', '%s', '%s');\n" % (
                    name_table, id_chem, smiles, chem, casrn,
                    name.replace("'", "''"))
            elif inch == "":
                cmd_sql = "INSERT INTO %s (id, smiles_origin, smiles_clean, dsstox_id, casn, name) VALUES (%s, '%s', '%s', '%s', '%s', '%s');\n" % (
                    name_table, id_chem, smiles, smi_cleaned, chem, casrn,
                    name.replace("'", "''"))
            else:
                cmd_sql = "INSERT INTO %s (id, smiles_origin, smiles_clean, inchikey, dsstox_id, casn, name) VALUES (%s, '%s', '%s', '%s', '%s', '%s', '%s');\n" % (
                    name_table, id_chem, smiles, smi_cleaned, inch, chem,
                    casrn, name.replace("'", "''"))
            i = i + 1

            cmd_all = cmd_all + cmd_sql

        self.cDB.runCMDaddElement(cmd_all)