Example #1
0
    def __init__(self, **kwargs):
        """ Module entry point for chemical component definition 2D image generation

        Args:
            ccUrlTarget (str): URL or path for concatenated chemical component dictionary (default: public wwPDB ftp)
            birdUrlTarget (str): URL or path for concatenated BIRD dictionary (default: public wwPDB ftp)
            licenseFilePath (str) = path to OpenEye license text file
            fileDirPath(str): directory containing generated image tree
            cachePath(str): cache directory for temporary files
            molBuildType(str): build type for constructing OE moleclues ('ideal-xyz', 'model-xyz' default: 'ideal-xyz')
        """
        #
        ccUrlTarget = kwargs.get("ccUrlTarget", None)
        birdUrlTarget = kwargs.get("birdUrlTarget", None)
        self.__licensePath = kwargs.get("licenseFilePath", "oe_license.txt")
        self.__fileDirPath = kwargs.get("fileDirPath", ".")
        self.__molBuildType = kwargs.get("molBuildType", "ideal-xyz")
        cachePath = kwargs.get("cachePath", ".")
        cachePath = os.path.abspath(cachePath)

        #
        self.__oemp = OeMoleculeProvider(
            ccUrlTarget=ccUrlTarget,
            birdUrlTarget=birdUrlTarget,
            ccFileNamePrefix="cc-full",
            cachePath=cachePath,
            molBuildType=self.__molBuildType,
            useCache=True,
            oeFileNamePrefix="oe-full",
        )
        self.__oeMolD = self.__oemp.getOeMolD()
Example #2
0
    def testSubStructureSearchBase(self):

        matchOpts = self.__myKwargs.get("matchOpts", "sub-struct-graph-relaxed")
        numProc = self.__numProcSearch
        oemp = OeMoleculeProvider(**self.__myKwargs)
        ok = oemp.testCache()
        self.assertTrue(ok)
        oesU = OeSubStructSearchUtils(oemp)
        #
        ccIdxP = ChemCompIndexProvider(**self.__myKwargs)
        ok = ccIdxP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        ccIdxD = ccIdxP.getIndex()
        #
        ky = next(iter(ccIdxD))
        oeMol = oemp.getMol(ky)
        #
        numMols = 10
        for ccId, _ in list(ccIdxD.items())[:numMols]:
            # ----
            startTime = time.time()
            oeMol = oemp.getMol(ccId)
            ccIdL = oesU.prefilterIndex(oeMol, ccIdxP, matchOpts=matchOpts)
            logger.info("%s search length %d in (%.4f seconds)", ccId, len(ccIdL), time.time() - startTime)
            #
            retStatus, mL = oesU.searchSubStructure(oeMol, ccIdList=ccIdL, matchOpts=matchOpts, numProc=numProc)
            logger.info("%s result length %d in (%.4f seconds)", ccId, len(mL), time.time() - startTime)
            self.assertTrue(retStatus)
            self.assertTrue(self.__resultContains(ccId, mL))
Example #3
0
 def testSubStructureSearchScreened(self):
     oeioU = OeIoUtils()
     oemp = OeMoleculeProvider(**self.__myKwargs)
     ok = oemp.testCache()
     ccmP = ChemCompIndexProvider(**self.__myKwargs)
     ccIdxD = ccmP.getIndex()
     ok = ccmP.testCache(minCount=self.__minCount)
     self.assertTrue(ok)
     oesU = OeSearchUtils(oemp,
                          screenType=self.__screenType,
                          numProc=self.__numProc)
     numMols = 20
     missL = []
     for ccId, ccD in list(ccIdxD.items())[:numMols]:
         # ----
         startTime = time.time()
         if "oe-smiles" not in ccD:
             continue
         logger.info("Search %s %r", ccId, ccD["oe-smiles"])
         oeQMol = oeioU.smartsToQmol(ccD["oe-smiles"])
         retStatus, mL = oesU.searchSubStructureScreened(oeQMol,
                                                         maxMatches=100)
         if retStatus:
             logger.info("%s (status=%r) match length %d in (%.4f seconds)",
                         ccId, retStatus, len(mL),
                         time.time() - startTime)
         if not self.__resultContains(ccId, mL):
             missL.append(ccId)
         #
         # self.assertGreaterEqual(len(mL), 1)
         # ----
     logger.info("Missed searches (%d) %r", len(missL), missL)
Example #4
0
    def testSubStructureSearchWithFingerPrint(self):
        oemp = OeMoleculeProvider(**self.__myKwargs)
        #
        ok = oemp.testCache()
        ccmP = ChemCompIndexProvider(**self.__myKwargs)
        ccIdxD = ccmP.getIndex()
        ok = ccmP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        minFpScore = 0.40
        maxFpResults = 50
        numMols = 20
        matchOpts = "graph-relaxed"
        oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList)
        # ----
        startTime = time.time()
        for ccId, _ in list(ccIdxD.items())[:numMols]:
            for fpType in self.__fpTypeList:
                oeMol = oemp.getMol(ccId)
                retStatus, mL = oesU.searchSubStructureWithFingerPrint(
                    oeMol,
                    fpType,
                    minFpScore,
                    maxFpResults,
                    matchOpts=matchOpts)
                self.assertTrue(retStatus)
                self.assertTrue(self.__resultContains(ccId, mL))

        logger.info("%s fingerprints search on %d in (%.4f seconds)",
                    len(self.__fpTypeList), numMols,
                    time.time() - startTime)
Example #5
0
 def testFingerPrintSearch(self):
     oemp = OeMoleculeProvider(**self.__myKwargs)
     # This will reload the oe binary cache.
     oeMol = oemp.getMol("004")
     self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12)
     #
     ok = oemp.testCache()
     ccmP = ChemCompIndexProvider(**self.__myKwargs)
     ccIdxD = ccmP.getIndex()
     ok = ccmP.testCache(minCount=self.__minCount)
     self.assertTrue(ok)
     minScore = 0.50
     maxResults = 50
     numMols = 50
     oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList)
     # ----
     startTime = time.time()
     for ccId, _ in list(ccIdxD.items())[:numMols]:
         for fpType in self.__fpTypeList:
             oeMol = oemp.getMol(ccId)
             retStatus, mL = oesU.searchFingerPrints(
                 oeMol,
                 fpType=fpType,
                 minFpScore=minScore,
                 maxFpResults=maxResults)
             self.assertTrue(retStatus)
             self.assertTrue(self.__resultContains(ccId, mL))
             # self.assertGreaterEqual(len(mL), 1)
     logger.info("%s fingerprints search on %d in (%.4f seconds)",
                 len(self.__fpTypeList), numMols,
                 time.time() - startTime)
Example #6
0
 def __getCache(self, molBuildType="model-xyz", useCache=True):
     oemp = OeMoleculeProvider(
         ccUrlTarget=self.__ccUrlTarget,
         birdUrlTarget=self.__birdUrlTarget,
         ccFileNamePrefix="cc-abbrev",
         cachePath=self.__cachePath,
         molBuildType=molBuildType,
         useCache=useCache,
     )
     ok = oemp.testCache()
     self.assertTrue(ok)
     return oemp.getOeMolD()
Example #7
0
 def testSubStructureSearch(self):
     oemp = OeMoleculeProvider(**self.__myKwargs)
     ok = oemp.testCache()
     ccmP = ChemCompIndexProvider(**self.__myKwargs)
     ccIdxD = ccmP.getIndex()
     ok = ccmP.testCache(minCount=self.__minCount)
     self.assertTrue(ok)
     oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList)
     numMols = 10
     for ccId, _ in list(ccIdxD.items())[:numMols]:
         # ----
         startTime = time.time()
         oeMol = oemp.getMol(ccId)
         retStatus, mL = oesU.searchSubStructure(oeMol, matchOpts="relaxed")
         logger.info("%s match length %d in (%.4f seconds)", ccId, len(mL),
                     time.time() - startTime)
         self.assertTrue(retStatus)
         self.assertTrue(self.__resultContains(ccId, mL))
Example #8
0
    def testSubStructureSearchScreenedFiltered(self):
        myKwargs = {
            "cachePath": self.__cachePath,
            "useCache": True,
            "fpTypeList": self.__fpTypeList,
            "ccFileNamePrefix": "cc-filtered",
            "oeFileNamePrefix": "oe-filtered",
            "molBuildType": "oe-iso-smiles",
            "limitPerceptions": False,
        }
        oeioU = OeIoUtils()
        oemp = OeMoleculeProvider(**myKwargs)
        ok = oemp.testCache()
        ccmP = ChemCompIndexProvider(**myKwargs)
        ccIdxD = ccmP.getIndex()
        ok = ccmP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        oesU = OeSearchUtils(oemp,
                             screenType=self.__screenType,
                             numProc=self.__numProc)
        numMols = 5000
        missL = []
        for ccId, ccD in list(ccIdxD.items())[:numMols]:
            # ----
            startTime = time.time()
            if "oe-smiles" not in ccD:
                continue
            logger.info("Search %s %r", ccId, ccD["oe-smiles"])
            oeQMol = oeioU.smartsToQmol(ccD["oe-smiles"])
            retStatus, mL = oesU.searchSubStructureScreened(oeQMol,
                                                            maxMatches=100)
            logger.info("%s (status=%r)match length %d in (%.4f seconds)",
                        ccId, retStatus, len(mL),
                        time.time() - startTime)
            if not self.__resultContains(ccId, mL):
                missL.append(ccId)

            # self.assertGreaterEqual(len(mL), 1)
            # ----
        logger.info("Missed searches (%d) %r", len(missL), missL)
Example #9
0
class ChemCompFileWorkflow(object):
    def __init__(self, **kwargs):
        """ Module entry point for chemical component definition 2D image generation

        Args:
            ccUrlTarget (str): URL or path for concatenated chemical component dictionary (default: public wwPDB ftp)
            birdUrlTarget (str): URL or path for concatenated BIRD dictionary (default: public wwPDB ftp)
            licenseFilePath (str) = path to OpenEye license text file
            fileDirPath(str): directory containing generated image tree
            cachePath(str): cache directory for temporary files
            molBuildType(str): build type for constructing OE moleclues ('ideal-xyz', 'model-xyz' default: 'ideal-xyz')
        """
        #
        ccUrlTarget = kwargs.get("ccUrlTarget", None)
        birdUrlTarget = kwargs.get("birdUrlTarget", None)
        self.__licensePath = kwargs.get("licenseFilePath", "oe_license.txt")
        self.__fileDirPath = kwargs.get("fileDirPath", ".")
        self.__molBuildType = kwargs.get("molBuildType", "ideal-xyz")
        cachePath = kwargs.get("cachePath", ".")
        cachePath = os.path.abspath(cachePath)

        #
        self.__oemp = OeMoleculeProvider(
            ccUrlTarget=ccUrlTarget,
            birdUrlTarget=birdUrlTarget,
            ccFileNamePrefix="cc-full",
            cachePath=cachePath,
            molBuildType=self.__molBuildType,
            useCache=True,
            oeFileNamePrefix="oe-full",
        )
        self.__oeMolD = self.__oemp.getOeMolD()

    def __setLicense(self, licensePath):
        ok = False
        try:
            if os.environ.get("OE_LICENSE") and os.access(os.environ["OE_LICENSE"], os.R_OK):
                logger.info("Using license from environment %r", os.environ["OE_LICENSE"])
                ok = True
            elif os.access(licensePath, os.R_OK):
                os.environ["OE_LICENSE"] = licensePath
                logger.info("Setting environmenal variable OE_LICENSE to %r", os.environ["OE_LICENSE"])
                ok = True
        except Exception as e:
            logger.error("Setting license file %r failing %s", licensePath, str(e))
        return ok

    def testCache(self):
        return self.__oemp.testCache() if self.__oemp else False

    def makeFiles(self, fmt="sdf"):
        """ Create files (mol, mol2) for all public chemical components.
        """
        try:

            if fmt not in ["mol", "mol2", "mol2h", "sdf"]:
                return False
            if not self.__setLicense(self.__licensePath):
                logger.error("Invalid license details - exiting")
                return False
            for ccId, oeMol in self.__oeMolD.items():
                if self.__molBuildType == "ideal-xyz":
                    filePath = os.path.join(self.__fileDirPath, fmt, ccId[0], ccId + "_ideal." + fmt)
                    oeioU = OeIoUtils()
                    oeioU.write(filePath, oeMol, constantMol=True)
                else:
                    filePath = os.path.join(self.__fileDirPath, fmt, ccId[0], ccId + "_model." + fmt)
                    oeioU = OeIoUtils()
                    oeioU.write(filePath, oeMol, constantMol=True)

            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
Example #10
0
    def testSssWithFingerPrintFromDescriptor(self):
        oemp = OeMoleculeProvider(**self.__myKwargs)
        ok = oemp.testCache()
        ccmP = ChemCompIndexProvider(**self.__myKwargs)
        ccIdxD = ccmP.getIndex()
        ok = ccmP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        limitPerceptions = False
        # minFpScore = 0.5
        maxFpResults = 50
        matchOpts = "graph-relaxed"
        numMols = 20
        oeioU = OeIoUtils()
        oesU = OeSearchUtils(oemp, fpTypeList=self.__fpTypeList)
        missTupL = []
        missedD = {}
        missedFpD = {}
        # ----
        startTime = time.time()
        for ccId, ccD in list(ccIdxD.items())[:numMols]:
            for buildType in [
                    "oe-iso-smiles", "oe-smiles", "acdlabs-smiles",
                    "cactvs-iso-smiles", "cactvs-smiles", "inchi"
            ]:
                if buildType in ccD:
                    logger.debug("Search %s %r", ccId, ccD[buildType])
                    if buildType in ["inchi"]:
                        oemf = OeMoleculeFactory()
                        oemf.setDescriptor(ccD["inchi"], "inchi", ccId)
                        ok = oemf.build(molBuildType="inchi",
                                        limitPerceptions=limitPerceptions)
                        if not ok:
                            logger.info("%s build failed with InChI %r", ccId,
                                        ccD["inchi"])
                        else:
                            oeMol = oemf.getMol()
                            if oemf.getInChI() != ccD["inchi"]:
                                logger.info(
                                    "%s regenerated InChI differs\n%r\n%s",
                                    ccId, ccD["inchi"], oemf.getInChI())
                    else:
                        oeMol = oeioU.smilesToMol(
                            ccD[buildType], limitPerceptions=limitPerceptions)
                    if not oeMol:
                        continue
                    maxHits = 0
                    minHits = maxFpResults
                    selfHit = False
                    for fpType, minFpScore in self.__fpTypeCuttoffList:
                        retStatus, mL = oesU.searchSubStructureWithFingerPrint(
                            oeMol,
                            fpType,
                            minFpScore,
                            maxFpResults,
                            matchOpts=matchOpts)
                        self.assertTrue(retStatus)
                        logger.debug("%s fpType %r hits %d", ccId, fpType,
                                     len(mL))
                        maxHits = max(maxHits, len(mL))
                        minHits = min(minHits, len(mL))
                        matchedSelf = self.__resultContains(ccId, mL)
                        selfHit = selfHit or matchedSelf
                        if not matchedSelf:
                            missedFpD.setdefault(ccId, []).append(
                                (buildType, fpType, len(mL)))
                    if not selfHit:
                        missedD.setdefault(ccId, []).append(buildType)

                    logger.info("%s (%r) buildType %r min hits %d max hits %d",
                                ccId, selfHit, buildType, minHits, maxHits)
                else:
                    logger.info("%s missing descriptor %r", ccId, buildType)
        #
        for ccId, missL in missedD.items():
            logger.info("%s missed list %r", ccId, missL)
            if ccId in missedFpD:
                logger.info("%s unmatched for fpTypes %r", ccId,
                            missedFpD[ccId])
        # ----
        doDepict = False
        if doDepict:
            mD = {}
            for missTup in missTupL:
                mD.setdefault(missTup[0], []).append(missTup[1])

            for ccId, buildTypeL in mD.items():
                idxD = ccIdxD[ccId]
                if "oe-iso-smiles" in idxD:
                    for buildType in buildTypeL:
                        self.__displayAlignedDescriptorPair(
                            ccId,
                            idxD["oe-iso-smiles"],
                            "oe-iso-smiles",
                            idxD[buildType],
                            buildType,
                            title=None,
                            limitPerceptions=True)

        logger.info("%s fingerprints search on %d in (%.4f seconds)",
                    len(self.__fpTypeList), numMols,
                    time.time() - startTime)
class ChemCompImageWorkflow(object):
    def __init__(self, **kwargs):
        """ Module entry point for chemical component definition 2D image generation

        Args:
            ccUrlTarget (str): URL or path for concatenated chemical component dictionary (default: public wwPDB ftp)
            birdUrlTarget (str): URL or path for concatenated BIRD dictionary (default: public wwPDB ftp)
            licenseFilePath (str) = path to OpenEye license text file
            imagePath(str): directory containing generated image tree
            cachePath(str): cache directory for temporary files

        """
        #
        ccUrlTarget = kwargs.get("ccUrlTarget", None)
        birdUrlTarget = kwargs.get("birdUrlTarget", None)
        self.__licensePath = kwargs.get("licenseFilePath", "oe_license.txt")
        self.__imagePath = kwargs.get("imagePath", ".")
        cachePath = kwargs.get("cachePath", ".")
        cachePath = os.path.abspath(cachePath)

        #
        self.__oemp = OeMoleculeProvider(
            ccUrlTarget=ccUrlTarget,
            birdUrlTarget=birdUrlTarget,
            ccFileNamePrefix="cc-full",
            cachePath=cachePath,
            molBuildType="model-xyz",
            useCache=True,
            oeFileNamePrefix="oe-full",
        )
        self.__oeMolD = self.__oemp.getOeMolD()

    def __setLicense(self, licensePath):
        ok = False
        try:
            if os.environ.get("OE_LICENSE") and os.access(
                    os.environ["OE_LICENSE"], os.R_OK):
                logger.info("Using license from environment %r",
                            os.environ["OE_LICENSE"])
                ok = True
            elif os.access(licensePath, os.R_OK):
                os.environ["OE_LICENSE"] = licensePath
                logger.info("Setting environmenal variable OE_LICENSE to %r",
                            os.environ["OE_LICENSE"])
                ok = True
        except Exception as e:
            logger.error("Setting license file %r failing %s", licensePath,
                         str(e))
        return ok

    def testCache(self):
        return self.__oemp.testCache() if self.__oemp else False

    def makeImages(self):
        """ Create images for all public chemical components with and without atom labels.
        """
        try:
            if not self.__setLicense(self.__licensePath):
                logger.error("Invalid license details - exiting")
                return False
            for ccId, oeMol in self.__oeMolD.items():
                imagePath = os.path.join(self.__imagePath, "image", ccId[0],
                                         ccId + ".svg")
                oed = OeDepict()
                title = ""
                oed.setMolTitleList([(ccId, oeMol, title)])
                # ---
                bondDisplayWidth = 10.0
                numAtoms = oeMol.NumAtoms()
                if numAtoms > 100 and numAtoms <= 200:
                    bondDisplayWidth = 6.0
                elif numAtoms > 200:
                    bondDisplayWidth = 4.0
                # ---
                oed.setDisplayOptions(
                    labelAtomName=False,
                    labelAtomCIPStereo=True,
                    labelAtomIndex=False,
                    labelBondIndex=False,
                    labelBondCIPStereo=True,
                    cellBorders=False,
                    bondDisplayWidth=bondDisplayWidth,
                )
                oed.setGridOptions(rows=1, cols=1, cellBorders=False)
                oed.prepare()
                oed.write(imagePath)
            for ccId, oeMol in self.__oeMolD.items():
                imagePath = os.path.join(self.__imagePath, "image_labeled",
                                         ccId[0], ccId + ".svg")
                oed = OeDepict()
                title = ""
                oed.setMolTitleList([(ccId, oeMol, title)])
                # ---
                bondDisplayWidth = 10.0
                numAtoms = oeMol.NumAtoms()
                if numAtoms > 100 and numAtoms <= 200:
                    bondDisplayWidth = 6.0
                elif numAtoms > 200:
                    bondDisplayWidth = 4.0
                # ---
                oed.setDisplayOptions(
                    labelAtomName=True,
                    labelAtomCIPStereo=True,
                    labelAtomIndex=False,
                    labelBondIndex=False,
                    labelBondCIPStereo=True,
                    cellBorders=False,
                    bondDisplayWidth=bondDisplayWidth,
                )
                oed.setGridOptions(rows=1, cols=1, cellBorders=False)
                oed.prepare()
                oed.write(imagePath)
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
    def __testBuildMoleculeCacheFiles(self, **kwargs):
        """Test build OE cache files from full component dictionary"""
        ccUrlTarget = kwargs.get("ccUrlTarget", None)
        birdUrlTarget = kwargs.get("birdUrlTarget", None)
        molLimit = kwargs.get("molLimit", 0)
        quietFlag = kwargs.get("quietFlag", True)
        molBuildType = kwargs.get("molBuildType", "ideal-xyz")
        fpTypeList = kwargs.get("fpTypeList", ["TREE"])
        screenTypeList = kwargs.get("screenTypeList", [])
        ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc")
        oeFileNamePrefix = kwargs.get("oeFileNamePrefix", "oe")
        #
        startTime = time.time()

        oemp = OeMoleculeProvider(
            ccUrlTarget=ccUrlTarget,
            birdUrlTarget=birdUrlTarget,
            cachePath=self.__cachePath,
            ccFileNamePrefix=ccFileNamePrefix,
            oeFileNamePrefix=oeFileNamePrefix,
            molBuildType=molBuildType,
            useCache=False,
            quietFlag=quietFlag,
            fpTypeList=fpTypeList,
            screenTypeList=screenTypeList,
        )
        ok = oemp.testCache()
        self.assertTrue(ok)
        oemp = OeMoleculeProvider(
            ccUrlTarget=ccUrlTarget,
            birdUrlTarget=birdUrlTarget,
            cachePath=self.__cachePath,
            ccFileNamePrefix=ccFileNamePrefix,
            oeFileNamePrefix=oeFileNamePrefix,
            molBuildType=molBuildType,
            useCache=True,
        )
        endTime = time.time()
        logger.info(">> Completed load molBuildType %r molLimit %r (%.4f seconds)", molBuildType, molLimit, endTime - startTime)
        #
        # ---

        deltaMol = 2
        minMol = minNumFp = molLimit - deltaMol if molLimit else 30000
        for fpType in fpTypeList:
            fpDb = oemp.getFingerPrintDb(fpType="TREE")
            logger.debug("fpType %r length %d", fpType, fpDb.NumFingerPrints())
            self.assertGreaterEqual(fpDb.NumFingerPrints(), minNumFp)
        #
        ccId = "004"
        oeMol = oemp.getMol(ccId)
        logger.debug("%s atom count %d", ccId, len(list(oeMol.GetAtoms())))
        #
        if molBuildType in ["oe-iso-smiles"]:
            self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 12)
        else:
            self.assertGreaterEqual(len(list(oeMol.GetAtoms())), 20)
        #
        oeDb, oeDbIdx = oemp.getOeMolDatabase()
        logger.debug("Type db %r length %d type idx %r length %d", type(oeDb), oeDb.NumMols(), type(oeDbIdx), len(oeDbIdx))
        self.assertGreaterEqual(oeDb.NumMols(), minMol)
        self.assertGreaterEqual(len(oeDbIdx), minMol)
        #
        if molBuildType in ["oe-iso-smiles"] and screenTypeList:
            ssDb = oemp.getSubSearchDb()
            self.assertGreaterEqual(ssDb.NumMolecules(), minMol)
        return True