def updateSMILES(self, name_table="chemicals"):
        """Function use to update the chemical table => check if smiles origin change"""

        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")
        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)
        self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/")

        #extract chemical without DTXSID
        # see if chem included
        cmd_SQL = "SELECT id, dsstox_id, smiles_origin, inchikey, smiles_clean FROM %s " % (
            name_table)
        l_chem_DB = self.cDB.execCMD(cmd_SQL)

        d_chem_DB = {}
        for chem_DB in l_chem_DB:
            d_chem_DB[chem_DB[1]] = [
                chem_DB[0], chem_DB[2], chem_DB[3], chem_DB[4]
            ]

        i = 0
        for chem in d_dsstox_SMILES.keys():
            dsstox_id = d_dsstox_SMILES[chem]["dsstox_substance_id"]
            inchkey = d_dsstox_SMILES[chem]["InChI Key_QSARr"]
            smiles = d_dsstox_SMILES[chem]["Original_SMILES"]
            smiles_cleaned = d_dsstox_SMILES[chem]["Canonical_QSARr"]
            try:
                smiles_indb = d_chem_DB[dsstox_id][
                    1]  # case of chemical is not in the DB
            except:
                continue
            inchkey_db = d_chem_DB[dsstox_id][2]
            smiles_cleaned_db = d_chem_DB[dsstox_id][3]
            smiles_db = d_chem_DB[dsstox_id][1]
            if smiles != smiles_db:
                # recompute cleaned SMILES
                c_chem = CompDesc.CompDesc(smiles, self.pr_desc)
                c_chem.prepChem()
                if c_chem.err == 0:
                    c_chem.generateInchiKey()
                else:
                    c_chem.smi = None
                if c_chem.err == 0:
                    inchikey = c_chem.inchikey
                else:
                    inchikey = None

                if d_chem_DB[dsstox_id][2] != inchikey:
                    cmd_sql = "UPDATE %s SET smiles_origin = '%s', smiles_clean = '%s', inchikey='%s' WHERE id='%s';" % (
                        name_table, smiles, c_chem.smi, inchikey,
                        d_chem_DB[dsstox_id][0])

                else:
                    continue  #cmd_sql = "UPDATE %s SET smiles_origin = '%s' WHERE id='%s';"%(name_table, smiles, d_chem_DB[dsstox_id][0])

                #print(smiles_cleaned,smiles_indb, dsstox_id)
                print(i)
                i = i + 1
                self.cDB.updateTable(cmd_sql)

        return
Exemple #2
0
    def pushNeighbors(self):
        prneighbor = pathFolder.createFolder(self.prout + "Neighbors/")
        ptable3Dim = prneighbor + "Table_DIM1D2D-2_1.csv"
        ptableNDim = prneighbor + "Table_DIM1D2D-170_207.csv"
        if path.exists(ptable3Dim) and path.exists(ptableNDim):
            ddist3D = toolbox.loadMatrixToDict(ptable3Dim)
            for chem in ddist3D.keys():
                ddist3D[chem] = ddist3D[chem]["Neighbors"].split(" ")
            ddistND = toolbox.loadMatrixToDict(ptableNDim)
            for chem in ddistND.keys():
                ddistND[chem] = ddistND[chem]["Neighbors"].split(" ")

            cDB = DBrequest.DBrequest()
            cDB.verbose = 0
            for chem in ddist3D.keys():
                # print(chem)
                #out1D2D = cDB.getRow("%s_neighbors"%(self.nameMap), "inchikey='%s'" % (chem))
                out1D2D = []
                if out1D2D == []:
                    w3D = "{" + ",".join(
                        ["\"%s\"" % (neighbor)
                         for neighbor in ddist3D[chem]]) + "}"
                    wND = "{" + ",".join(
                        ["\"%s\"" % (neighbor)
                         for neighbor in ddistND[chem]]) + "}"
                    cDB.addElement(
                        "%s_neighbors" % (self.nameMap),
                        ["inchikey", "neighbors_dim3", "neighbors_dimn"],
                        [chem, w3D, wND])
    def updateMissingDTXSID(self, name_table):
        """Check if we can populate chemicals with no DTXSID with new update"""

        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")
        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)

        #extract chemical without DTXSID
        # see if chem included
        cmd_SQL = "SELECT id, smiles_origin FROM %s WHERE dsstox_id is null" % (
            name_table)
        l_chem_DB = self.cDB.execCMD(cmd_SQL)

        d_chem_DB = {}
        for chem_DB in l_chem_DB:
            print(chem_DB)
            d_chem_DB[chem_DB[1]] = chem_DB[0]

        for chem in d_dsstox_SMILES.keys():
            smiles = d_dsstox_SMILES[chem]["Original_SMILES"]
            try:
                id_chem = d_chem_DB[smiles]
                # update chemical
                cmd_sql = "UPDATE %s SET casn = '%s', name = '%s', dsstox_id='%s' WHERE id='%s';" % (
                    name_table, d_dsstox_SMILES[chem]["casrn"],
                    d_dsstox_name[d_dsstox_SMILES[chem]["dsstox_substance_id"]]
                    ["preferred_name"].replace("'", "''"),
                    d_dsstox_SMILES[chem]["dsstox_substance_id"], id_chem)
                print(cmd_sql)
                self.cDB.updateTable(cmd_sql)
            except:
                continue
    def extractOnlyNewChem(self, name_table, field_comparison):

        pr_out = pathFolder.createFolder(self.pr_out + "updateDSSTOX/")
        p_filout = pr_out + "chem_list.txt"

        if path.exists(p_filout):
            filout = open(p_filout, "r")
            self.l_chem_toadd = filout.read().split("\n")
            filout.close()
            return

        filout = open(p_filout, "w")
        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)
        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")
        # extract list of chemicals in the DB
        l_chem_DB = self.cDB.execCMD("SELECT %s FROM %s" %
                                     (field_comparison, name_table))
        for chem_DB in l_chem_DB:
            chem = chem_DB[0]
            if chem == None:
                continue
            try:
                del d_dsstox_name[chem]
            except:
                pass

        for chem in d_dsstox_name.keys():
            filout.write(chem + "\n")
        filout.close()

        self.l_chem_toadd = list(d_dsstox_name.keys())
Exemple #5
0
    def plotDensity(self, paff, ltypeaff, nbframeselect, prMDdesc, prout):

        # create matrix with conformer
        if not "paff" in self.__dict__:
            self.paff = paff
            daff = toolbox.loadMatrixToDict(paff)
            self.daff = daff

        dID = {}
        for IDChEMBL in self.daff.keys():
            if self.daff[IDChEMBL]["Type"] in ltypeaff:
                dID[IDChEMBL] = deepcopy(self.daff[IDChEMBL])

        pMasterMatrix = prout + "masterM_" + "-".join(ltypeaff) + "_" + str(
            nbframeselect)
        if not path.exists(pMasterMatrix):
            fmasterMatrix = open(pMasterMatrix, "w")
            # header
            pdescbyFrameh = prMDdesc + dID.keys()[0] + "/Ligbyframe"
            dh = toolbox.loadMatrixToDict(pdescbyFrameh)
            lh = dh[dh.keys()[0]].keys()
            lh.remove("Frame")
            fmasterMatrix.write("ID\t" + "\t".join(lh) + "\n")

            # create masterMatrix
            i = 0
            for IDChEMBL in dID.keys():
                print IDChEMBL, i
                pdescbyFrame = prMDdesc + IDChEMBL + "/Ligbyframe"
                dframe = toolbox.loadMatrixToDict(pdescbyFrame)
                nbframe = len(dframe.keys())

                if nbframe != 1501:
                    print IDChEMBL, "Error"
                    i += 1
                    continue

                step = int(nbframe / nbframeselect)
                #print step
                i = 0
                while i < nbframe:
                    print i
                    idw = str(IDChEMBL) + "_" + str(i)
                    stframe = str(i)
                    while len(str(stframe)) < 5:
                        stframe = "0" + str(stframe)
                    stframe = "LGD_" + stframe
                    print stframe, idw
                    fmasterMatrix.write(
                        "%s\t%s\n" %
                        (idw, "\t".join([str(dframe[stframe][h])
                                         for h in lh])))
                    i = i + step
            fmasterMatrix.close()
Exemple #6
0
    def computeCoords(self, corVal, distributionVal, insertDB=1):

        if not "p1D2D" in self.__dict__ and not "p3D" in self.__dict__:
            self.computeDesc(insertDB=0)

        err = 0
        # create coords
        prmap = pathFolder.createFolder(self.prout + "map_" + str(corVal) +
                                        "-" + str(distributionVal) + "/")
        pcoordDim1Dim2 = prmap + "coord1D2D.csv"
        pcoordDim3D = prmap + "coord3D.csv"
        if path.exists(pcoordDim1Dim2) and path.exists(pcoordDim3D):
            self.pcoords1D2D = pcoordDim1Dim2
            self.pcoords3D = pcoordDim3D

        elif not path.exists(pcoordDim1Dim2) or not path.exists(pcoordDim3D):
            runExternalSoft.RComputeMapFiles(self.p1D2D, self.p3D, prmap,
                                             corVal, distributionVal)
        elif not path.exists(pcoordDim1Dim2) or not path.exists(pcoordDim3D):
            print("ERROR file map")
            err = 1

        self.pcoords1D2D = pcoordDim1Dim2
        self.pcoords3D = pcoordDim3D

        if insertDB == 1 and err == 0:
            dcoord1D2D = toolbox.loadMatrixToDict(pcoordDim1Dim2, sep=",")
            dcoord3D = toolbox.loadMatrixToDict(pcoordDim3D, sep=",")
            cDB = DBrequest.DBrequest()
            cDB.verbose = 0
            for chem in dcoord1D2D.keys():
                #print(chem)
                #out1D2D = cDB.getRow("drugbank_coords", "inchikey='%s'" % (chem))
                out1D2D = []
                if out1D2D == []:
                    nbdim1d2d = len(dcoord1D2D[chem].keys()) - 1
                    nbdim3d = len(dcoord3D[chem].keys()) - 1

                    w1D2D = "{" + ",".join([
                        "\"%s\"" % (dcoord1D2D[chem]["DIM" + str(i)])
                        for i in range(1, nbdim1d2d + 1)
                    ]) + "}"
                    w3D = "{" + ",".join([
                        "\"%s\"" % (dcoord3D[chem]["DIM3-" + str(i)])
                        for i in range(1, nbdim3d + 1)
                    ]) + "}"
                    cDB.addElement(
                        "drugbank_coords",
                        ["inchikey", "dim1d2d", "dim3d", "indrugbank"],
                        [chem, w1D2D, w3D, "True"])
Exemple #7
0
    def pushTablePropAllInDB(self):

        if not "pTableInAll" in self.__dict__:
            print("GENERATE TABLE FIRST")

        dtopush = toolbox.loadMatrixToDict(self.pTableInAll)
        cDB = DBrequest.DBrequest()
        cDB.verbose = 0
        i = 0
        i = 500000
        lchem = list(dtopush.keys())
        imax = len(lchem)
        while i < imax:
            #for chem in dtopush.keys():
            chem = lchem[i]
            outChem = cDB.getRow("dsstox_prop", "db_id='%s'" % (chem))
            if outChem == []:
                wprop = "{" + ",".join([
                    "\"%s\"" % (dtopush[chem][PROP].replace("'", ""))
                    for PROP in LPROP
                ]) + "}"
                cDB.addElement("dsstox_prop", ["db_id", "prop_value"],
                               [chem, wprop])
            i = i + 1
        return
    def importDescriptors(self, prDesc = "/home/borrela2/interference/Desc/DESCbyCAS/"):

        ddesc = {}
        dcolor = {}
        for chemID in self.dDye.keys():
            chem = self.dDye[chemID]
            CASID = chem["casrn"]
            color = chem["color"]

            pdescin = prDesc + CASID + ".txt"
            if path.exists(prDesc + CASID + ".txt"):
                dtemp = toolbox.loadMatrixToDict(pdescin)
                ddesc.update(dtemp)
                dcolor[CASID] = color

        self.dcolor = dcolor

        pdesc = self.prout + "descMat"
        fildesc = open(self.prout + "descMat", "w")
        ldesc = ddesc[ddesc.keys()[0]].keys()
        print ldesc
        print CASID
        del ldesc[ldesc.index("CAS")]
        fildesc.write("ID," + ",".join(ldesc) + ",Aff\n")
        for CASID in ddesc.keys():
            lw = []
            for desc in ldesc:
                if desc in ddesc[CASID].keys():
                    lw.append(str(ddesc[CASID][desc]))
                else:
                    lw.append("NA")
            fildesc.write("%s,%s,1\n" % (CASID, ",".join(lw)))
        fildesc.close()
        return pdesc
Exemple #9
0
    def histRMSD(self, paff, prMDanalysis, prout):

        if not "paff" in self.__dict__:
            self.paff = paff
            daff = toolbox.loadMatrixToDict(paff)
            self.daff = daff

        #lig
        pRMSD = prout + "RMSDprotlig"
        fRMSD = open(pRMSD, "w")
        fRMSD.write("CHEMBLid\tRMSDlig\tRMSDca\tRMSDall\tTypeAff\n")

        for CHEMBLid in daff.keys():
            pRMSDin = prMDanalysis + CHEMBLid + "_2hyy_MD/RMSDs/"
            if not path.exists(pRMSDin):
                continue

            RMSDChem = RMSD.RMSD(pRMSDin)
            RMSDChem.loadRMSDs(["ligand", "protein"])
            RMSDprot = RMSDChem.MRMSDprot()
            RMSDlig = RMSDChem.MRMSDlig()

            fRMSD.write("%s\t%f\t%f\t%f\t%s\n" %
                        (CHEMBLid, RMSDlig, RMSDprot[0], RMSDprot[1],
                         daff[CHEMBLid]["Type"]))

        fRMSD.close()

        runExternalSoft.histRMSD(pRMSD, prout)
Exemple #10
0
    def clusterizeTopActive(self, top):

        prtop = pathFolder.createFolder(self.prout + "top" + str(top) + "/")

        pafftop = prtop + "Aff" + str(top) + ".csv"
        if not path.exists(pafftop):
            # create top descriptor
            daff = toolbox.loadMatrixToDict(self.paff)
            if len(daff.keys()) <= top:
                copyfile(self.paff, pafftop)
            else:
                laff = []
                for chemID in daff.keys():
                    laff.append(float(daff[chemID]["Aff"]))
                minAff = sorted(laff, reverse=True)[top - 1]
                lchem = []
                for chemID in daff.keys():
                    if float(daff[chemID]["Aff"]) >= minAff:
                        lchem.append(chemID)

                filout = open(pafftop, "w")
                filout.write("CHEMBLID\tAff\tType\n")
                for chem in lchem:
                    filout.write("%s\t%s\t%s\n" %
                                 (daff[chem]["CHEMBLID"], daff[chem]["Aff"],
                                  daff[chem]["Type"]))
                filout.close()

        runExternalSoft.clusterize(self.pdesc, pafftop, self.typeAff,
                                   self.cutoff, prtop)
    def formatChemForToolChem(self, split_nbChem):

        pr_out = pathFolder.createFolder(self.pr_out + "ForToolchem/")
        self.pr_forToolChem = pr_out
        l_filin = listdir(pr_out)
        if len(l_filin) > 0:  # case files are already computed
            return

        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)
        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")

        i = 0
        ifile = 0
        l_dsstoxid = list(d_dsstox_name.keys())
        l_dsstoxid = shuffle(l_dsstoxid)
        imax = len(l_dsstoxid)
        i_file = 0
        while i < imax:
            if i_file == split_nbChem or i == 0:
                i_file = 0
                if i == 0:
                    f_out = open("%schemlist_1.csv" % (pr_out), "w")
                else:
                    print(i / split_nbChem + 1)
                    f_out.close()
                    f_out = open(
                        "%schemlist_%i.csv" % (pr_out, i / split_nbChem + 1),
                        "w")
                f_out.write(
                    "smiles_origin\tdsstox_id\tdrugbank_id\tname\tcasn\n")

            dsstox_id = l_dsstoxid[i]
            name = d_dsstox_name[dsstox_id]["preferred_name"]
            casrn = d_dsstox_name[dsstox_id]["casrn"]
            try:
                smiles_origin = d_dsstox_SMILES[dsstox_id]["Original_SMILES"]
            except:
                smiles_origin = ""
            f_out.write("%s\t%s\tNA\t%s\t%s\n" %
                        (smiles_origin, dsstox_id, name, casrn))

            i = i + 1
            i_file = i_file + 1
        f_out.close()
Exemple #12
0
def prepChemForWebsite(PTOX21CHEMSUM, PTOX21CHEM, prout, indb=0):

    dTox21ChemSum = toolbox.loadMatrixToDict(PTOX21CHEMSUM, sep=",")
    dTox21Chem = toolbox.loadMatrixToDict(PTOX21CHEM, sep='\t')

    #print(dTox21Chem)
    #print(dTox21ChemSum)

    cDB = DBrequest.DBrequest()

    dout = {}
    for chem in dTox21ChemSum:
        CAS = dTox21ChemSum[chem]["casn"]
        dtxid = dTox21ChemSum[chem]["dsstox_substance_id"]
        name = dTox21ChemSum[chem]["chnm"]
        try:
            SMILES = dTox21Chem[dtxid]["smiles_origin"]
        except:
            SMILES = 0
        dout[CAS] = {}
        dout[CAS]["DTXID"] = dtxid
        dout[CAS]["name"] = name
        dout[CAS]["SMILES"] = SMILES

    pfilout = prout + "ChemSum"
    filout = open(pfilout, "w")
    filout.write("CAS\tDTXID\tName\tSMILES\n")

    for chem in dout.keys():
        if dout[chem]["SMILES"] == 0:
            continue
        filout.write("%s\t%s\t%s\t%s\n" %
                     (chem, dout[chem]["DTXID"], dout[chem]["name"],
                      dout[chem]["SMILES"]))
        if indb == 1:
            cDB.addElement("bodymap_chemicals",
                           ["casn", "dsstox_id", "name", "smiles"], [
                               chem, dout[chem]["DTXID"], dout[chem]["name"],
                               dout[chem]["SMILES"]
                           ])

    filout.close()

    return dout
Exemple #13
0
    def generateCentroidFile(self):

        if not "prmaps" in self.__dict__:
            print("Generate Maps first")
            return

        if not "psplitMap" in self.__dict__:
            print("Generate the split map first")
            return

        else:
            lpfmap = list(self.psplitMap.values())
            #print(lpfmap)

            pfilout = self.prmaps + "centroids.csv"
            #if path.exists(pfilout):
            #    return

            coords1D2D = toolbox.loadMatrixCoords(self.pcoords1D2D, 2)
            coords3D = toolbox.loadMatrixCoords(self.pcoords3D, 2)

            dout = {}
            for pmap in lpfmap:

                print(pmap)
                nameMap = pmap.split("/")[-1].split("_")[0]
                dmap = toolbox.loadMatrixToDict(pmap)
                print(nameMap)

                i = 1
                while 1:
                    lcoords = []
                    for chem in dmap.keys():
                        if int(dmap[chem]["map"]) == i:
                            lcoords.append([
                                coords1D2D[chem][0], coords1D2D[chem][1],
                                coords3D[chem][0]
                            ])
                    if lcoords == []:
                        break
                    else:
                        print(len(lcoords))
                        print(lcoords[0])
                        coordCentroid = calculate.centroid(lcoords)
                        dout[nameMap + "_" + str(i)] = coordCentroid
                    i = i + 1

            #print(dout)
            filout = open(pfilout, "w")
            filout.write("map\tx\ty\tz\n")
            for map in dout.keys():
                filout.write("%s\t%s\t%s\t%s\n" %
                             (map, dout[map][0], dout[map][1], dout[map][2]))
            filout.close()
Exemple #14
0
def loadOPERADesc(pOPERA, table):

    ddesc = toolbox.loadMatrixToDict(pOPERA, sep = ",")
    LPROP = list(ddesc[list(ddesc.keys())[0]].keys())
    cDB = DBrequest.DBrequest()
    cDB.verbose = 1
    i = 1
    for PROP in LPROP[1:]:
        cDB.addElement(table, ["id", "name"], [i, PROP])
        i = i + 1

#loadOPERADesc("/home/borrela2/sandbox/VM/ChemMap2Site/temp/949289/OPERA/ZWRUINPWMLAQRD-UHFFFAOYSA-N.csv", "desc_opera_name")
Exemple #15
0
    def get_colnames(self):
        
        lp_tripodFile = listdir(self.pr_tripodFiles)
        p_tripodFile = lp_tripodFile[0] 
        print(p_tripodFile)
        d_tripod = toolbox.loadMatrixToDict(self.pr_tripodFiles + "/" + p_tripodFile)


        l_col = [col.lower() for col in list(d_tripod[list(d_tripod.keys())[0]].keys())]
        self.l_col = l_col

        return l_col
    def computeDescNewChem(self):

        if not "l_chem_toadd" in self.__dict__:
            self.extractOnlyNewChem("chemicals", "dsstox_id")

        self.pr_desc = pathFolder.createFolder(self.pr_out + "DESC/")
        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")

        l_chem_add = self.l_chem_toadd
        shuffle(l_chem_add)

        i = 0
        imax = len(self.l_chem_toadd)
        print(imax)
        while i < imax:
            if i % 1000 == 0:
                print(i)
            chem = l_chem_add[i]

            try:
                smiles = d_dsstox_SMILES[chem]["Original_SMILES"]
            except:
                print(i, ": ERROR in SMILES - ", chem)
                i = i + 1
                continue

            cChem = CompDesc.CompDesc(smiles, self.pr_desc)
            cChem.prepChem()

            if cChem.err == 0:
                cChem.generateInchiKey()
                if cChem.err == 1:
                    print("Error inch: %s" % (l_chem_add[i]))
                    i = i + 1
                    continue

                # 2D desc
                cChem.computeAll2D()
                cChem.writeMatrix("2D")

                #3D desc
                cChem.set3DChemical()
                if cChem.err == 0:
                    cChem.computeAll3D()
                    if cChem.err == 1:
                        print("Error 3D desc: %s -- %s" % (l_chem_add[i], i))
                    else:
                        cChem.writeMatrix("3D")
                else:
                    print("Error 3D generation: %s -- %s" % (l_chem_add[i], i))
            else:
                print("Error prep: %s -- %s" % (l_chem_add[i], i))
            i = i + 1
    def updateNameAndCAS(self, name_table):
        """Function use to update the chemical table => error in the prefered name"""

        cmd_SQL = "SELECT id, dsstox_id, casn, name FROM %s " % (name_table)
        l_chem_DB = self.cDB.execCMD(cmd_SQL)

        d_chem_DB = {}
        for chem_DB in l_chem_DB:
            d_chem_DB[chem_DB[1]] = [chem_DB[0], chem_DB[2], chem_DB[3]]

        d_dsstox_name = toolbox.loadMatrixToDict(self.p_chem_name)
        d_dsstox_SMILES = toolbox.loadMatrixToDict(self.p_chem_SMILES, sep=",")

        i = 0
        l_dsstoxid = list(d_dsstox_name.keys())
        imax = len(l_dsstoxid)
        j = 0
        while i < imax:
            if i % 50000 == 0:
                print(i)
            dsstox_id = l_dsstoxid[i]
            name = d_dsstox_name[dsstox_id]["preferred_name"].replace(
                "'", "''")
            casrn = d_dsstox_name[dsstox_id]["casrn"]

            try:
                id_db = d_chem_DB[dsstox_id][0]
                name_db = d_chem_DB[dsstox_id][2]
                cas_db = d_chem_DB[dsstox_id][1]
            except:
                i = i + 1
                continue

            if name_db != name or casrn != cas_db:
                cmd_sql = "UPDATE %s SET casn = '%s', name = '%s' WHERE id='%s';" % (
                    name_table, casrn, name, id_db)
                j = j + 1
                self.cDB.updateTable(cmd_sql)

            i = i + 1
Exemple #18
0
    def loadRMSDs(self, lload=["ligand", "protein", "residues"]):

        for typeRMSD in lload:
            if typeRMSD == "ligand":
                dlig = {}
                dlig["RMSF"] = {}

                pRMSF = self.prin + typeRMSD + "/ligRMSF"
                dlig["RMSF"] = toolbox.loadMatrixToDict(pRMSF)

                pShaEP = self.prin + typeRMSD + "/ligShaEP"
                dlig["ShaEP"] = toolbox.matrixToList(pShaEP)

                self.lig = dlig

            if typeRMSD == "protein":
                pRMSD = self.prin + typeRMSD + "/protRMSD"
                dprot = toolbox.matrixToList(pRMSD)
                self.prot = dprot

            if typeRMSD == "residues":
                pRes = self.prin + typeRMSD + "/resRMSD"
                dres = toolbox.loadMatrixToDict(pRes)
                self.res = dres
Exemple #19
0
    def pushDSSTOXNeighbors(self, prin):

        cDB = DBrequest.DBrequest()
        cDB.verbose = 0
        lfile = listdir(prin)
        for fileNeighbor in lfile:
            try:
                dneighbor = toolbox.loadMatrixToDict(prin + fileNeighbor)
            except:
                remove(prin + fileNeighbor)
                continue
            inchkey = list(dneighbor.keys())[0]
            dneighbor[inchkey]["Neighbors"] = dneighbor[inchkey][
                "Neighbors"].split(" ")
            w3D = "{" + ",".join([
                "\"%s\"" % (neighbor)
                for neighbor in dneighbor[inchkey]["Neighbors"]
            ]) + "}"
            cDB.addElement("dsstox_neighbors", ["inchikey", "neighbors_dim3"],
                           [inchkey, w3D])
        return
Exemple #20
0
    def loadLC50(self):

        d_LC50 = toolbox.loadMatrixToDict(self.p_LD50)
        for chem in d_LC50.keys():
            dtxsid = d_LC50[chem]["DTXSID"]
            if not dtxsid in list(self.dout.keys()):
                self.dout[dtxsid] = {}
                self.dout[dtxsid]["very_toxic"] = "NA"
                self.dout[dtxsid]["nontoxic"] = "NA"
                self.dout[dtxsid]["LD50_mgkg"] = "NA"
                self.dout[dtxsid]["EPA_category"] = "NA"
                self.dout[dtxsid]["GHS_category"] = "NA"

            self.dout[dtxsid]["LD50_mgkg_Literature"] = d_LC50[chem][
                "LD50_mgkg_Literature"]
            self.dout[dtxsid]["log(LD50_Literature)"] = d_LC50[chem][
                "log(LD50_Literature)"]
            self.dout[dtxsid]["consensus_LD50"] = d_LC50[chem][
                "consensus_LD50"]
            self.dout[dtxsid]["concordance_LD50"] = d_LC50[chem][
                "concordance_LD50"]
Exemple #21
0
    def importDescriptors(self,
                          prDesc="/home/borrela2/interference/Desc/DESCbyCAS/"
                          ):

        ddesc = {}
        dwave = {}
        for chemID in self.DB.keys():
            chem = self.DB[chemID]
            CASID = chem["Structure"].split("_")[1]
            Abs = chem["Wavelength"]

            pdescin = prDesc + CASID + ".txt"
            if path.exists(prDesc + CASID + ".txt"):
                dtemp = toolbox.loadMatrixToDict(pdescin)
                ddesc.update(dtemp)
                dwave[CASID] = Abs

        self.dwave = dwave

        pdesc = self.prout + "descMat"
        fildesc = open(self.prout + "descMat", "w")
        ldesc = ddesc[ddesc.keys()[0]].keys()
        print ldesc
        print CASID
        del ldesc[ldesc.index("CAS")]
        fildesc.write("ID," + ",".join(ldesc) + ",Aff\n")
        for CASID in ddesc.keys():
            lw = []
            for desc in ldesc:
                if desc in ddesc[CASID].keys():
                    lw.append(str(ddesc[CASID][desc]))
                else:
                    lw.append("NA")
            fildesc.write("%s,%s,1\n" % (CASID, ",".join(lw)))
        fildesc.close()
        return pdesc
    def updateDescOPERA(self):

        # 1. load opera descriptor used for the DB
        cmd_SQL = "SELECT id, name FROM chem_descriptor_opera_name_new "
        l_opera_desc = self.cDB.execCMD(cmd_SQL)

        d_opera_desc = {}
        for opera_desc in l_opera_desc:
            id_opera = int(opera_desc[0])
            desc_name = opera_desc[1]
            d_opera_desc[id_opera] = desc_name

        # 2. load precomputed OPERA desc
        d_dsstox2dsctox = toolbox.loadMatrixToDict(self.p_dtxsid_dtxcid_map,
                                                   sep="\t")
        #print(list(d_dsstox2dsctox.keys())[1])
        #print(d_dsstox2dsctox[list(d_dsstox2dsctox.keys())[1]])

        # 3. load inch and dtx id from chemicals table
        cmd_SQL = "SELECT dsstox_id, inchikey FROM chemicals WHERE inchikey is not null"
        l_chemicalsDB = self.cDB.execCMD(cmd_SQL)

        # 3.2 load inch without desc opera
        cmd_SQL = "SELECT inchikey FROM chemical_description WHERE desc_opera is null"
        l_inchDB = self.cDB.execCMD(cmd_SQL)

        l_inch_toupdate = []
        for inch in l_inchDB:
            l_inch_toupdate.append(inch[0])
        l_inch_toupdate.sort()

        d_chemicalsDB = {}
        for chemicalsDB in l_chemicalsDB:
            inchikey = chemicalsDB[1]
            if toolbox.binary_search(l_inch_toupdate, inchikey) == -1:
                continue
            dsstoxid = chemicalsDB[0]
            if dsstoxid == None:
                continue

            try:
                d_chemicalsDB[inchikey].append(dsstoxid)
            except:
                d_chemicalsDB[inchikey] = [dsstoxid]

        d_update = {}
        for inchikey in d_chemicalsDB.keys():
            for dsstox_sid in d_chemicalsDB[inchikey]:
                try:
                    dsstox_cid = d_dsstox2dsctox[dsstox_sid][
                        "dsstox_compound_id"]
                except:
                    continue
                d_update[dsstox_cid] = {}
                d_update[dsstox_cid]["inchikey"] = inchikey
                d_update[dsstox_cid]["opera"] = {}
                for i_desc_opera in d_opera_desc.keys():
                    d_update[dsstox_cid]["opera"][
                        d_opera_desc[i_desc_opera]] = "-9999"
                break

        # 4. load OPERA desccriptors
        for pr_OPERA_pred in self.l_prOPERA_pred:
            l_p_fopera = listdir(pr_OPERA_pred)
            for p_fopera in l_p_fopera:
                print(pr_OPERA_pred + p_fopera)
                d_temp = toolbox.loadMatrixToDict(pr_OPERA_pred + p_fopera,
                                                  sep=",")

                for dtx_cid in d_update.keys():
                    for desc in d_update[dtx_cid]["opera"].keys():
                        try:
                            d_update[dtx_cid]["opera"][desc] = float(
                                d_temp[dtx_cid][desc])
                        except:
                            pass

        # 5. update DB
        j = 0
        l_dtx_cid = list(d_update.keys())
        jmax = len(l_dtx_cid)
        shuffle(l_dtx_cid)
        while j < jmax:
            dtx_cid = l_dtx_cid[j]
            inchikey = d_update[dtx_cid]["inchikey"]

            l_toadd = []
            i = 1
            imax = len(list(d_opera_desc.keys())) + 1
            while i < imax:
                l_toadd.append(d_update[dtx_cid]["opera"][d_opera_desc[i]])
                i = i + 1

            wOPERA = "{" + ",".join(
                ["\"%s\"" % (str(desc_val)) for desc_val in l_toadd]) + "}"
            cmd_sql = "UPDATE chemical_description SET desc_opera = '%s' WHERE inchikey='%s';" % (
                wOPERA, inchikey)
            self.cDB.verbose = 0
            self.cDB.updateTable(cmd_sql)

            if j % 1000 == 0:
                print(j)

            j = j + 1
Exemple #23
0
    def generateNeighborMatrix(self, nbNeighbor, lnDim):

        if not "pcoords1D2D" in self.__dict__:
            print("Compute Coord first")
            return 1
        else:
            if self.nameMap == "dsstox":
                # no N dimension because to slow
                prNeighbor = pathFolder.createFolder(self.prout + "Neighbors/")
                #pfilout = prNeighbor + "Table_DIM1D2D-2_1.csv"
                #if path.exists(pfilout):
                #    return

                dDim1D2D = toolbox.loadMatrixCoords(self.pcoords1D2D, 2)
                dDim3D = toolbox.loadMatrixCoords(self.pcoords3D, 2)
                lpfmap = self.psplitMap
                lmap = []
                for imap in lpfmap.keys():
                    lmap.append(toolbox.loadMatrixToDict(lpfmap[imap]))
                #print(lmap)

                # from 1D2D coord
                lchem = list(dDim1D2D.keys())
                shuffle(lchem)
                i = 0
                imax = len(lchem)
                while i < imax:
                    inch = lchem[i]

                    pfilout = prNeighbor + inch
                    if path.exists(pfilout):
                        i = i + 1
                        continue
                    filout = open(pfilout, "w")
                    filout.write("ID\tNeighbors\n")

                    # define map where we inspect
                    linmap = []
                    for dmap in lmap:
                        mapin = int(dmap[inch]["map"])
                        for chem in dmap.keys():
                            if int(dmap[chem]["map"]) == mapin or int(
                                    dmap[chem]["map"]) == (mapin + 1) or int(
                                        dmap[chem]["map"]) == (mapin - 1):
                                if not chem in lmap:
                                    linmap.append(chem)

                    print(len(linmap))
                    ddist = {}
                    ddist[inch] = {}
                    for ID in linmap:
                        if ID != inch:
                            ddist[inch][ID] = sqrt(
                                sum([(xi - yi)**2 for xi, yi in zip(
                                    [
                                        dDim1D2D[ID][0], dDim1D2D[ID][1],
                                        dDim3D[ID][0]
                                    ],
                                    [
                                        dDim1D2D[inch][0], dDim1D2D[inch][1],
                                        dDim3D[inch][0]
                                    ],
                                )]))

                    lID = [
                        i[0] for i in sorted(ddist[inch].items(),
                                             key=lambda x: x[1])
                    ][:nbNeighbor]
                    filout.write("%s\t%s\n" % (inch, " ".join(lID)))
                    filout.close()
                    i = i + 1
            else:
                # compute all dimension openning withou restriction
                if lnDim == []:
                    dDim1D2D = toolbox.loadMatrixToDict(self.pcoords1D2D,
                                                        sep=",")
                    dDim3D = toolbox.loadMatrixToDict(self.pcoords3D, sep=",")
                    chem1 = list(dDim1D2D.keys())[0]
                    n1D2D = len(list(dDim1D2D[chem1].keys())) - 1
                    n3D = len(list(dDim3D[chem1].keys())) - 1
                    lnDim = [n1D2D, n3D]

                prNeighbor = pathFolder.createFolder(self.prout + "Neighbors/")
                pfilout = prNeighbor + "Table_DIM1D2D-" + str(
                    lnDim[0]) + "_" + str(lnDim[1]) + ".csv"
                if path.exists(pfilout):
                    return
                else:
                    dDim1D2D = toolbox.loadMatrixToDict(self.pcoords1D2D,
                                                        sep=",")
                    dDim3D = toolbox.loadMatrixToDict(self.pcoords3D, sep=",")

                    dcor = {}
                    # from 1D2D coord
                    for inch in dDim1D2D.keys():
                        dcor[inch] = []

                        i = 1
                        while i <= lnDim[0]:
                            dcor[inch].append(
                                float(dDim1D2D[inch]["DIM" + str(i)]))
                            i = i + 1

                        i = 1
                        while i <= lnDim[1]:
                            dcor[inch].append(
                                float(dDim3D[inch]["DIM3-" + str(i)]))
                            i = i + 1

                    ddist = {}
                    for ID in dcor.keys():
                        ddist[ID] = {}
                        for ID2 in dcor.keys():
                            if ID != ID2:
                                ddist[ID][ID2] = sqrt(
                                    sum([
                                        (xi - yi)**2
                                        for xi, yi in zip(dcor[ID], dcor[ID2])
                                    ]))

                        lID = [
                            i[0] for i in sorted(ddist[ID].items(),
                                                 key=lambda x: x[1])
                        ][:nbNeighbor]
                        ddist[ID] = lID

                    # write in table
                    ftable = open(pfilout, "w")
                    ftable.write("ID\tNeighbors\n")
                    for ID in ddist.keys():
                        ftable.write("%s\t%s\n" % (ID, " ".join(ddist[ID])))
                    ftable.close()
Exemple #24
0
    def generateTablePropAllDSSTOX(self,
                                   prDSSTOXPred,
                                   pknownSDF,
                                   pLD50,
                                   pDSSTOXMapOnCID,
                                   insertDB=0):

        pTableinfo = self.prout + "tablePropForDB.csv"
        if path.exists(pTableinfo) and insertDB == 0:
            self.pTableInAll = pTableinfo
            return

        #print ("LOAD INFO FROM DCHEM")
        #if not "dchem" in self.__dict__:
        #    self.loadlistChem()

        # intialisation ful dictionnary
        dDSSTOX = {}
        dmapCIDtoSID = {}
        print("LOAD INFO MAP SID to CID")

        filMap = open(pDSSTOXMapOnCID, "r", encoding="utf8", errors="ignore")
        llines = filMap.readlines()
        filMap.close()

        lhead = llines[0].replace("\"", "")
        lhead = lhead.strip().split(",")
        #print(lhead)
        iDSSSID = lhead.index("dsstox_substance_id")
        iDSSCID = lhead.index("DSSTox_Structure_Id")
        iname = lhead.index("preferred_name")
        i = 1
        imax = len(llines)
        while i < imax:  #####################################
            lineClean = toolbox.formatLine(llines[i])
            lelem = lineClean.strip().split(",")
            try:
                dDSSTOX[lelem[iDSSSID]] = {}
                dDSSTOX[lelem[iDSSSID]]["preferred_name"] = lelem[iname]
                dDSSTOX[lelem[iDSSSID]]["SMILES"] = self.dchem[
                    lelem[iDSSSID]]["smiles_clean"]
                dDSSTOX[lelem[iDSSSID]]["inchikey"] = self.dchem[
                    lelem[iDSSSID]]["inchikey"]
                dmapCIDtoSID[lelem[iDSSCID]] = lelem[iDSSSID]
            except:
                pass
            i = i + 1
        filMap.close()

        print("INIT DICTIONNARY")
        # put in dict out -> initialization to NA
        for chem in dDSSTOX.keys():
            for PROP in LPROP[3:]:
                try:
                    dDSSTOX[chem][PROP] = "NA"
                except:
                    break

        print("LOAD PRED")
        # load prediction and update table
        lppred = listdir(prDSSTOXPred)
        for ppred in lppred:  ##########################################
            if ppred[-3:] == "csv":
                print(ppred, "Load file")
                dtemp = toolbox.loadMatrixToDict(prDSSTOXPred + ppred, sep=",")
                k1 = list(dtemp.keys())[0]
                #print(dtemp[k1])
                #dddd
                for chemIDtemp in dtemp.keys():
                    DTXCID = dtemp[chemIDtemp]["MoleculeID"]
                    try:
                        DTXSID = dmapCIDtoSID[DTXCID]
                    except:
                        continue
                    for k in dtemp[chemIDtemp].keys():
                        if k in LPROP[3:]:
                            dDSSTOX[DTXSID][k] = dtemp[chemIDtemp][k]

        print("PRED LOAD")

        print("LOAD SDF AND LD50")
        #load sdf
        dsdf = parseSDF.parseSDF(pknownSDF, "InChI Key_QSARr", self.prout)
        dsdf.parseAll()

        #load LD50 file
        dLD50 = toolbox.loadMatrixToDict(pLD50)
        print("SDF and table LD50 loaded")

        for chem in dDSSTOX.keys():
            tempinchKey = dDSSTOX[chem]["inchikey"]
            # look sdf -> map on the sdf
            for dchemIDsdf in dsdf.lc:
                if dchemIDsdf["InChI Key_QSARr"] == tempinchKey:
                    for ksdf in dchemIDsdf.keys():
                        if ksdf in LPROP[3:]:
                            dDSSTOX[chem][ksdf] = dchemIDsdf[ksdf]

            # look in LD50 file -> map on the LD50
            for chemIDLD50 in dLD50.keys():
                if dLD50[chemIDLD50]["InChI Key_QSARr"] == tempinchKey:
                    for kLD50 in dLD50[chemIDLD50].keys():
                        if kLD50 in LPROP[3:]:
                            dDSSTOX[chem][kLD50] = dLD50[chemIDLD50][kLD50]

        print("WRITE TABLE")
        # load MAP

        filout = open(pTableinfo, "w")
        filout.write("ID\t%s\n" % ("\t".join(LPROP)))
        for chem in dDSSTOX.keys():
            filout.write(
                "%s\t%s\n" %
                (chem, "\t".join([str(dDSSTOX[chem][prop])
                                  for prop in LPROP])))
        filout.close()

        self.pTableInAll = pTableinfo
    def pushCoords(self):
        self.cDB.connOpen()
        d_coords_1D2D = toolbox.loadMatrixToDict(self.pr_coords +
                                                 "coord1D2D.csv",
                                                 sep=",")
        d_coords_3D = toolbox.loadMatrixToDict(self.pr_coords + "coord3D.csv",
                                               sep=",")

        l_inchikey = self.cDB.extractColoumn(
            "chemical_description", "inchikey",
            "WHERE dim1d2d is null AND map_name = '%s';" % (self.map_name))
        l_inchikey = [inch[0] for inch in l_inchikey]
        self.cDB.connClose()

        shuffle(l_inchikey)
        imax = len(l_inchikey)
        i_inch = 0

        while i_inch < imax:
            try:
                wdim1d2d = "{" + ",".join([
                    "\"%s\"" % (str(d_coords_1D2D[l_inchikey[i_inch]]["DIM%s" %
                                                                      (i)]))
                    for i in range(1, 11)
                ]) + "}"
            except:
                wdim1d2d = ""

            try:
                wdim3d = "{" + ",".join([
                    "\"%s\"" % (str(d_coords_3D[l_inchikey[i_inch]]["DIM3-%s" %
                                                                    (i)]))
                    for i in range(1, 11)
                ]) + "}"
            except:
                wdim3d = ""

            if wdim1d2d != "" and wdim3d != "":
                wd3_cube = "{\"%s\",\"%s\",\"%s\"}" % (
                    d_coords_1D2D[l_inchikey[i_inch]]["DIM1"],
                    d_coords_1D2D[l_inchikey[i_inch]]["DIM2"],
                    d_coords_3D[l_inchikey[i_inch]]["DIM3-1"])
                cmd_sql = "UPDATE chemical_description SET dim1d2d = '%s', dim3d = '%s', d3_cube = '%s' WHERE inchikey='%s' AND map_name = '%s';" % (
                    wdim1d2d, wdim3d, wd3_cube, l_inchikey[i_inch],
                    self.map_name)
                self.cDB.updateTable(cmd_sql)
                i_inch = i_inch + 1

            elif wdim1d2d != "" and wdim3d == "":
                cmd_sql = "UPDATE chemical_description SET dim1d2d = '%s' WHERE inchikey='%s' AND map_name = '%s';" % (
                    wdim1d2d, l_inchikey[i_inch], self.map_name)
                self.cDB.updateTable(cmd_sql)
                i_inch = i_inch + 1

            elif wdim1d2d == "" and wdim3d != "":
                cmd_sql = "UPDATE chemical_description SET dim3d = '%s' WHERE inchikey='%s' AND map_name = '%s';" % (
                    wdim3d, l_inchikey[i_inch], self.map_name)
                self.cDB.updateTable(cmd_sql)
                i_inch = i_inch + 1
            else:
                i_inch = i_inch + 1

        return
def formatInfo(db, pdesc, lkinfo, pjs, prout):

    if path.exists(pjs):
        js = open(pjs, "a")

    else:
        js = open(pjs, "w")

    # write headers
    js.write("function loadInfoDrug(){\n")
    js.write("    var infodrug={")

    # load 1D2D desc
    if path.exists(pdesc):
        ddesc = toolbox.loadMatrixToDict(pdesc)
    else:
        return

    lw = []
    # write JS
    for cpd in db.lc:
        namecpd = cpd[db.name]
        linfo = []
        for kinfo in lkinfo:
            if kinfo in list(cpd.keys()):
                if cpd[kinfo] != "":
                    linfo.append("\"" + str(cpd[kinfo]) + "\"")
                else:
                    linfo.append("\"NA\"")
            elif namecpd in list(ddesc.keys()) and kinfo in list(
                    ddesc[namecpd].keys()):
                if ddesc[namecpd][kinfo] != "":
                    linfo.append("\"" + str(ddesc[namecpd][kinfo]) + "\"")
                else:
                    linfo.append("\"NA\"")
            else:
                linfo.append("\"NA\"")
        linenew = "\"" + str(namecpd) + "\"" + ":[" + ",".join(linfo) + "]"
        lw.append(linenew)

    js.write(",".join(lw) + "};\n")

    js.write("    return(infodrug);\n};\n\n\n")
    js.close()

    pinfo = prout + "tableinfo.csv"
    finfo = open(pinfo, "w")
    finfo.write("ID\t" + "\t".join(lkinfo) + "\n")

    for cpd in db.lc:
        namecpd = cpd[db.name]
        linfo = []
        for kinfo in lkinfo:
            if kinfo in list(cpd.keys()):
                if cpd[kinfo] != "":
                    linfo.append(str(cpd[kinfo]))
                else:
                    linfo.append("NA")
            elif namecpd in list(ddesc.keys()) and kinfo in list(
                    ddesc[namecpd].keys()):
                if ddesc[namecpd][kinfo] != "":
                    linfo.append(str(ddesc[namecpd][kinfo]))
                else:
                    linfo.append("NA")
            else:
                linfo.append("NA")

        finfo.write("%s\t%s\n" % (namecpd, "\t".join(linfo)))

    finfo.close()
Exemple #27
0
    def loadlistChem(self):

        prForDB = pathFolder.createFolder(self.prout + "forDB/")
        pfilout = prForDB + "db.csv"
        #try:remove(pfilout)
        #except:pass
        #print(pfilout)
        if path.exists(pfilout):
            dchem = toolbox.loadMatrixToDict(pfilout, sep="\t")
        else:
            dchem = {}
            if self.nameMap == "dsstox":
                dchemIn = toolbox.loadMatrixToDict(
                    self.plistChem,
                    sep=",")  #rewrite pfas and tox21 with comma
            else:
                dchemIn = toolbox.loadMatrixToDict(self.plistChem, sep="\t")

            for chemIn in dchemIn.keys():
                if "SMILES" in list(dchemIn[chemIn].keys()):
                    SMILES_origin = dchemIn[chemIn]["SMILES"]
                    DTXSID = dchemIn[chemIn]["DTXSID"]
                elif "Original_SMILES" in list(dchemIn[chemIn].keys()):
                    SMILES_origin = dchemIn[chemIn]["Original_SMILES"]
                    DTXSID = dchemIn[chemIn]["dsstox_substance_id"]
                else:
                    print("ERROR")
                    return

                dchem[DTXSID] = {}
                dchem[DTXSID]["db_id"] = DTXSID
                dchem[DTXSID]["smiles_origin"] = SMILES_origin

                # prepare ligand
                cchem = Chemical.Chemical(SMILES_origin, self.prDesc)
                cchem.prepChem()
                if cchem.err == 1:
                    qsar_ready = 0
                    cleanSMILES = "NA"
                    inchikey = "NA"
                else:
                    qsar_ready = 1
                    cleanSMILES = cchem.smi
                    inchikey = cchem.generateInchiKey()
                    cchem.writeSMIClean()

                dchem[DTXSID]["smiles_clean"] = cleanSMILES
                dchem[DTXSID]["inchikey"] = inchikey
                dchem[DTXSID]["qsar_ready"] = qsar_ready

            # write table for control -> after open and put in the DB
            filout = open(pfilout, "w", encoding="utf8")
            filout.write(
                "db_id\tsmiles_origin\tsmiles_clean\tinchikey\tqsar_ready\t%s\n"
                % (self.nameMap))
            for chem in dchem.keys():
                filout.write(
                    "%s\t%s\t%s\t%s\t%s\t%s\n" %
                    (chem, dchem[chem]["smiles_origin"],
                     dchem[chem]["smiles_clean"], dchem[chem]["inchikey"],
                     dchem[chem]["qsar_ready"], 1))
            filout.close()
        self.dchem = dchem
Exemple #28
0
    def splitMap(self, nbsplit, dim, insertDB=0):

        if not "prmap" in self.__dict__:
            print("Generate the map files first")
            return

        else:
            prout = pathFolder.createFolder(self.prmap + "split_" +
                                            str(nbsplit) + "/")
            self.prmaps = prout

            if not "psplitMap" in self.__dict__:
                self.psplitMap = {}

            # generate only one file with chem and map
            if dim == 1:
                pfilout = prout + "mapx_split.csv"

            elif dim == 2:
                pfilout = prout + "mapy_split.csv"

            else:
                pfilout = prout + "mapz_split.csv"

            self.psplitMap[dim] = pfilout
            if path.exists(pfilout) and insertDB == 0:
                return
            elif not path.exists(pfilout):
                coord1D2D = self.prmap + "coord1D2D.csv"
                coord3D = self.prmap + "coord3D.csv"

                if dim == 1 or dim == 2:
                    din = toolbox.loadMatrixCoords(coord1D2D, 2)
                else:
                    din = toolbox.loadMatrixCoords(coord3D, 2)

                # max and min 1D2D
                maxDim = 0.0
                minDim = 0.0

                nbchem = len(list(din.keys()))
                nbchembymap = int(nbchem / nbsplit)

                # calibrate max and min
                print("== Initiate calibration ==")
                for chem in din.keys():

                    if dim == 1 or dim == 3:
                        dimVal = din[chem][0]
                    elif dim == 2:
                        dimVal = din[chem][1]

                    if dimVal > maxDim:
                        maxDim = dimVal
                    if dimVal < minDim:
                        minDim = dimVal
                print("== End calibration ==")

                dmap = {}
                imap = 1
                dmap[imap] = []

                dimVal = minDim
                while dimVal < maxDim:
                    dimVal = dimVal + 0.10
                    if len(dmap[imap]) > nbchembymap:
                        imap = imap + 1
                        dmap[imap] = []
                    ichem = 0
                    lchem = list(din.keys())
                    nbchem = len(lchem)
                    while ichem < nbchem:

                        if dim == 1 or dim == 3:
                            valtemp = din[lchem[ichem]][0]
                        elif dim == 2:
                            valtemp = din[lchem[ichem]][1]

                        if valtemp < dimVal:

                            dmap[imap].append(deepcopy(lchem[ichem]))
                            del din[lchem[ichem]]
                            del lchem[ichem]
                            nbchem = nbchem - 1
                            continue
                        else:
                            ichem = ichem + 1

                print("==== Write output ====")
                filout = open(pfilout, "w")
                filout.write("inchikey\tmap\n")
                for d in dmap.keys():
                    for chem in dmap[d]:
                        filout.write("%s\t%s\n" % (chem, d))
                filout.close()

        if insertDB == 1:
            cDB = DBrequest.DBrequest()
            #cDB.verbose = 1

            dmap = toolbox.loadMatrixToDict(pfilout)
            tableIn = "dsstox_coords"
            if dim == 1:
                mapIn = "mapx"
            elif dim == 2:
                mapIn = "mapy"
            else:
                mapIn = "mapz"

            for chem in dmap.keys():
                inch = chem.replace("\"", "")

                cmdSQL = "UPDATE %s SET %s=%s WHERE inchikey='%s';" % (
                    tableIn, mapIn, dmap[chem]["map"], inch)
                cDB.updateTable(cmdSQL)
Exemple #29
0
    def computeCoords(self, corVal, distributionVal, insertDB=1):

        if not "p1D2D" in self.__dict__ and not "p3D" in self.__dict__:
            self.computeDesc(insertDB=0, w=1)

        # create coords
        prmap = pathFolder.createFolder(self.prout + "map_" + str(corVal) +
                                        "-" + str(distributionVal) + "/")
        self.prmap = prmap

        pcoordDim1Dim2 = prmap + "coord1D2D.csv"
        pcoordDim3D = prmap + "coord3D.csv"
        if not path.exists(pcoordDim1Dim2) or not path.exists(pcoordDim3D):
            runExternalSoft.RComputeMapFiles(self.p1D2D, self.p3D, prmap,
                                             corVal, distributionVal)

        if not path.exists(pcoordDim1Dim2) or not path.exists(pcoordDim3D):
            print("ERROR file map")
            return
        else:
            self.pcoords1D2D = pcoordDim1Dim2
            self.pcoords3D = pcoordDim3D

        if insertDB == 1:
            if self.nameMap == "dsstox":
                dcoord1D2D = toolbox.loadMatrixCoords(pcoordDim1Dim2, 10)
                dcoord3D = toolbox.loadMatrixCoords(pcoordDim3D, 10)
            else:
                dcoord1D2D = toolbox.loadMatrixToDict(pcoordDim1Dim2, ",")
                dcoord3D = toolbox.loadMatrixToDict(pcoordDim3D, ",")

            cDB = DBrequest.DBrequest()
            cDB.verbose = 0
            lchem = list(dcoord1D2D.keys())
            i = 0
            imax = len(lchem)
            while i < imax:
                #out1D2D = cDB.getRow("%s_coords"%(self.nameMap), "inchikey='%s'" % (chem))
                #if out1D2D == []:
                if self.nameMap == "dsstox":

                    w1D2D = "{" + ",".join([
                        "\"%s\"" % (str(coord))
                        for coord in dcoord1D2D[lchem[i]]
                    ]) + "}"
                    w3D = "{" + ",".join([
                        "\"%s\"" % (str(coord)) for coord in dcoord3D[lchem[i]]
                    ]) + "}"
                    cDB.addElement("%s_coords" % (self.nameMap),
                                   ["inchikey", "dim1d2d", "dim3d", "in_db"],
                                   [lchem[i], w1D2D, w3D, "1"])

                    del dcoord1D2D[lchem[i]]
                    del dcoord3D[lchem[i]]
                    del lchem[i]
                    imax = imax - 1

                else:
                    nbdim1d2d = len(dcoord1D2D[lchem[i]].keys()) - 1
                    nbdim3d = len(dcoord3D[lchem[i]].keys()) - 1

                    w1D2D = "{" + ",".join([
                        "\"%s\"" % (dcoord1D2D[lchem[i]]["DIM" + str(i)])
                        for i in range(1, nbdim1d2d + 1)
                    ]) + "}"
                    w3D = "{" + ",".join([
                        "\"%s\"" % (dcoord3D[lchem[i]]["DIM3-" + str(i)])
                        for i in range(1, nbdim3d + 1)
                    ]) + "}"
                    cDB.addElement("%s_coords" % (self.nameMap),
                                   ["inchikey", "dim1d2d", "dim3d", "in_db"],
                                   [lchem[i], w1D2D, w3D, "1"])

                    del dcoord1D2D[lchem[i]]
                    del dcoord3D[lchem[i]]
                    del lchem[i]
                    imax = imax - 1
def UpdateDBChemPropVal(prPred, pInde, psdf, LPROP, prout):

    # load SDF
    cSDF = parseSDF.parseSDF(psdf, "CASRN", prout)
    cSDF.parseAll()

    dSDF = {}
    for chem in cSDF.lc:
        if not chem["Original_SMILES"] in list(dSDF.keys()):
            dSDF[chem["Original_SMILES"]] = deepcopy(chem)

    # pIdentifier
    dSMILES = toolbox.loadMatrixToDict(pInde, sep=",")
    dSMILES_out = {}
    for chem in dSMILES.keys():
        try:
            dSMILES_out[dSMILES[chem]["DTXSID"]] = deepcopy(dSMILES[chem])
        except:
            continue

    # load chem prediction
    lfilePred = listdir(prPred)
    dpred = {}
    for filePred in lfilePred[:1]:
        print(filePred)
        dtemp = toolbox.loadMatrixToDict(prPred + filePred, sep=",")

        # primary key will be DTXSID
        for DTXCID in dtemp.keys():
            DTXSID = dtemp[DTXCID]["dsstox_substance_id"]
            if not DTXSID in list(dpred.keys()):
                dpred[DTXSID] = dtemp[DTXCID]

    # write for DB Update
    pfiloutDesc = prout + "OPERA_desc_" + "update.csv"
    filoutDesc = open(pfiloutDesc, "w")
    filoutDesc.write("DTXSID\t%s\n" % "\t".join(LPROP))

    for DTXSID in dpred.keys():

        try:
            SMILES = dSMILES_out[DTXSID]["SMILES"]
        except:
            SMILES = "ERROR"
        iprop = 0
        imax = len(LPROP)
        lval = []
        err = 0
        while iprop < imax:
            PROP = LPROP[iprop]
            #print(PROP)
            try:
                val = str(dpred[DTXSID][PROP])
            except:
                try:
                    val = str(dSMILES_out[DTXSID][PROP])
                except:
                    try:
                        val = str(dSDF[SMILES][PROP])
                    except:
                        val = "NA"
                        print(PROP)
                        #err = 1
                        #    break

            if val == "NaN":
                val = "NA"
            lval.append(val)
            iprop = iprop + 1
        if err == 0:
            filoutDesc.write("%s\t%s\n" % (DTXSID, "\t".join(lval)))
    filoutDesc.close()