コード例 #1
0
 def testSubStructureSearchFromIndexSelected(self):
     matchOpts = self.__myKwargs.get("matchOpts", "sub-struct-graph-relaxed")
     numProc = self.__numProcSearch
     oemp = OeSearchMoleculeProvider(**self.__myKwargs)
     ok = oemp.testCache()
     self.assertTrue(ok)
     oesU = OeSubStructSearchUtils(oemp)
     #
     ccIdxP = ChemCompSearchIndexProvider(**self.__myKwargs)
     ok = ccIdxP.testCache(minCount=self.__minCount)
     self.assertTrue(ok)
     ccIdxD = ccIdxP.getIndex()
     ky = next(iter(ccIdxD))
     oeMol = oemp.getMol(ky)
     #
     for ccId in ["BNZ", "ALA"]:
         # ----
         startTime = time.time()
         oeMol = oemp.getMol(ccId)
         #
         ccIdL = oesU.prefilterIndex(oeMol, ccIdxP, matchOpts=matchOpts)
         logger.info("%s search length %d in (%.4f seconds)", ccId, len(ccIdL), time.time() - startTime)
         #
         retStatus, mL = oesU.searchSubStructure(oeMol, ccIdList=ccIdL, matchOpts=matchOpts, numProc=numProc)
         logger.info("%s status %r result length %d in (%.4f seconds)", ccId, retStatus, len(mL), time.time() - startTime)
         self.assertTrue(retStatus)
         self.assertTrue(self.__resultContains(ccId, mL))
コード例 #2
0
    def updateSearchIndex(self, useCache=False):
        """Rebuild the search index from source chemical component and BIRD definitions.
           Update the internal state of this index in the current object instance.

            Resource requirements 771 secs 6 proc macbook pro 7GB memory.

        Args:
            useCache (bool): False to rebuild search index and True to reload

        Returns:
            bool: True for success or false otherwise
        """
        ok = False
        try:
            kwargs = copy.deepcopy(
                self.__configD["ccsiKwargs"]
            ) if "ccsiKwargs" in self.__configD else None
            if kwargs:
                kwargs["useCache"] = useCache
                siIdxP = ChemCompSearchIndexProvider(**kwargs)
                ok = siIdxP.testCache()
                self.__siIdxP = siIdxP if siIdxP else None
                self.__siIdx = siIdxP.getIndex() if siIdxP and ok else {}
                logger.info("Search index status %r index len %d", ok,
                            len(self.__siIdx) if self.__siIdx else 0)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return ok
コード例 #3
0
    def __reload(self, **kwargs):
        """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions.

        Args:
            limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True)
            fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO)
            screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... )
            useCache (bool, optional): flag to use cached files. Defaults to True.
            cachePath (str): path to the top cache directory. Defaults to '.'.
            numProc (int): number processors to engage in screen substructure search database generation.
            suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store.
            molLimit (int):

        Returns:
            (bool) : True for success or False othewise

        """
        try:
            useCache = kwargs.get("useCache", True)
            cachePath = kwargs.get("cachePath", ".")
            numProc = kwargs.get("numProc", 2)
            molLimit = kwargs.get("molLimit", None)
            fpTypeList = kwargs.get(
                "fpTypeList", ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"])
            # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"])
            screenTypeList = kwargs.get("screenTypeList", None)

            limitPerceptions = kwargs.get("limitPerceptions", False)
            suppressHydrogens = kwargs.get("suppressHydrogens", False)
            quietFlag = kwargs.get("quietFlag", True)
            logSizes = kwargs.get("logSizes", False)
            fpDbType = "STANDARD"
            buildScreenedDb = True
            #
            oeCount = 0
            errCount = 0
            failIdList = []
            oeIo = OeIoUtils(quietFlag=quietFlag)
            # --------
            oeSearchMolFilePath = os.path.join(self.__dirPath,
                                               self.__getOeSearchMolFileName())
            if not useCache or (useCache
                                and not self.__mU.exists(oeSearchMolFilePath)):
                cmpKwargs = {
                    k: v
                    for k, v in kwargs.items()
                    if k not in ["cachePath", "useCache", "molLimit"]
                }
                ccsiP = ChemCompSearchIndexProvider(cachePath=cachePath,
                                                    useCache=True,
                                                    molLimit=molLimit,
                                                    **cmpKwargs)
                ok = ccsiP.testCache(minCount=molLimit, logSizes=logSizes)
                # ----
                ccIdxD = ccsiP.getIndex() if ok else {}
                idxCount = len(ccIdxD)
                # ------- JDW OE mol construction here -----
                startTime = time.time()
                oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCacheFromIndex(
                    oeSearchMolFilePath,
                    ccIdxD,
                    quietFlag=quietFlag,
                    fpTypeList=fpTypeList,
                    limitPerceptions=limitPerceptions,
                    suppressHydrogens=suppressHydrogens)
                if failIdList:
                    logger.info("failures %r", failIdList)
                endTime = time.time()
                logger.info(
                    "Constructed %d/%d cached oeMols  (unconverted %d) (%.4f seconds)",
                    oeCount, idxCount, errCount, endTime - startTime)
            # --------
            oeMolDbFilePath = os.path.join(self.__dirPath,
                                           self.__getOeMolDbFileName())
            if not useCache or (useCache
                                and not self.__mU.exists(oeMolDbFilePath)):
                startTime = time.time()
                molCount = oeIo.createOeBinaryDatabaseAndIndex(
                    oeSearchMolFilePath, oeMolDbFilePath)
                endTime = time.time()
                logger.info(
                    "Created and stored %d indexed oeMols in OE database format (%.4f seconds)",
                    molCount, endTime - startTime)

            # --------
            if fpDbType == "FAST":
                for fpType in fpTypeList:
                    startTime = time.time()
                    #  Fast FP search database file names
                    fpPath = os.path.join(self.__dirPath,
                                          self.__getFastFpDbFileName(fpType))
                    if not useCache or (useCache
                                        and not self.__mU.exists(fpPath)):
                        ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath,
                                                              fpPath,
                                                              fpType=fpType)
                        endTime = time.time()
                        logger.info(
                            "Created and stored %s fingerprint database (%.4f seconds)",
                            fpType, endTime - startTime)
            # --------
            if buildScreenedDb and screenTypeList:
                for screenType in screenTypeList:
                    startTime = time.time()
                    fp = os.path.join(self.__dirPath,
                                      self.__getSubSearchFileName(screenType))
                    if not useCache or (useCache and not self.__mU.exists(fp)):
                        ok = oeIo.createOeSubSearchDatabase(
                            oeSearchMolFilePath,
                            fp,
                            screenType=screenType,
                            numProc=numProc)
                        endTime = time.time()
                        logger.info(
                            "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)",
                            ok, screenType, endTime - startTime)
                        # ---------
                        ssDb = oeIo.loadOeSubSearchDatabase(
                            fp, screenType=screenType, numProc=numProc)
                        ok = ssDb.NumMolecules() == oeCount
                        # ----------
            #
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
コード例 #4
0
    def buildSearchFiles(self, **kwargs):
        """Build cif, sdf (optional), and mol2 files for components in the chemical component search index.
           Exclude ions or other extraneous molecules lacking bonds.

        Args:
            ccUrlTarget (str): locator for source chemical component dictionary (default: full public dictionary)
            birdUrlTarget (str): locator for source BIRD dictionary (default: full public dictionary)
            limitPerceptions (bool): restrict automatic perceptions in OE molecular build operations (default: False)
            numProc (int): number of processors
            useCache (bool): use existing resource file where possible (default: True)
            molLimit (str):  limit the number to ingested chemical compont (default: None)
            quietFlag (bool): suppress output in OE library operations (default: True)

        Returns:
            (int): number molfiles generated
        """
        cachePath = self.__cachePath
        ccUrlTarget = kwargs.get("ccUrlTarget", None)
        birdUrlTarget = kwargs.get("birdUrlTarget", None)
        molLimit = kwargs.get("molLimit", None)
        quietFlag = kwargs.get("quietFlag", True)
        fpTypeList = kwargs.get("fpTypeList", [])
        screenTypeList = kwargs.get("screenTypeList", [])
        ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc-full"
        oeFileNamePrefix = "oe-%s" % self.__prefix if self.__prefix else "oe-cc-full"
        numProc = kwargs.get("numProc", 2)
        minCount = kwargs.get("minCount", 0)
        useCache = kwargs.get("useCache", True)
        useSdf = kwargs.get("useSdf", True)
        useMol2 = kwargs.get("useMol2", False)
        limitPerceptions = kwargs.get("limitPerceptions", False)
        logSizes = False
        #
        startTime = time.time()
        ccmP = ChemCompMoleculeProvider(cachePath=cachePath,
                                        useCache=useCache,
                                        ccFileNamePrefix=ccFileNamePrefix,
                                        ccUrlTarget=ccUrlTarget,
                                        birdUrlTarget=birdUrlTarget,
                                        molLimit=molLimit)
        ok = ccmP.testCache(minCount=minCount, logSizes=logSizes)
        logger.info(
            "Completed chemical component provider load %r (%.4f seconds)", ok,
            time.time() - startTime)
        #
        startTime = time.time()
        oesmp = OeSearchMoleculeProvider(
            ccUrlTarget=ccUrlTarget,
            birdUrlTarget=birdUrlTarget,
            cachePath=cachePath,
            ccFileNamePrefix=ccFileNamePrefix,
            oeFileNamePrefix=oeFileNamePrefix,
            useCache=useCache,
            quietFlag=quietFlag,
            fpTypeList=fpTypeList,
            screenTypeList=screenTypeList,
            numProc=numProc,
            molLimit=molLimit,
            limitPerceptions=limitPerceptions,
        )
        ok = oesmp.testCache()
        logger.info("Completed OE molecule provider load %r (%.4f seconds)",
                    ok,
                    time.time() - startTime)
        #
        startTime = time.time()
        ccSIdxP = ChemCompSearchIndexProvider(
            cachePath=cachePath,
            useCache=useCache,
            ccFileNamePrefix=ccFileNamePrefix,
            limitPerceptions=limitPerceptions,
            numProc=numProc)
        ok = ccSIdxP.testCache()
        logger.info(
            "Completed chemical component search index load %r (%.4f seconds)",
            ok,
            time.time() - startTime)
        #
        ccSIdx = ccSIdxP.getIndex() if ccSIdxP and ok else {}
        logger.info("Search index status %r index length %d", ok, len(ccSIdx))
        #
        ccIdD = {}
        mU = MarshalUtil()
        oeU = OeIoUtils(dirPath=cachePath)
        numMols = 0
        searchFileDirPath = self.getSearchDirFilePath()
        pathTupList = []
        for sId in ccSIdx:
            ccId = sId.split("|")[0]
            # standard CIF definition
            if ccId not in ccIdD:
                cifPath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                       ccId + ".cif")
                if not (useCache and mU.exists(cifPath)):
                    ccMol = ccmP.getMol(ccId)
                    if not self.__checkCif(ccMol):
                        continue
                    mU.doExport(cifPath, [ccMol], fmt="mmcif")
            #
            oeMol = oesmp.getMol(sId)
            if not self.__checkOeMol(oeMol):
                continue
            #
            # Sanity checks on the generated OE molecule
            #
            cifPath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                   sId + ".cif")
            if sId != ccId and not (useCache and mU.exists(cifPath)):
                oeccU = OeChemCompUtils()
                ok = oeccU.addOeMol(sId,
                                    oeMol,
                                    missingModelXyz=True,
                                    writeIdealXyz=False)
                if ok:
                    oeccU.write(cifPath)

            if useSdf:
                molFilePath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                           sId + ".sdf")
                if not (useCache and mU.exists(molFilePath)):
                    ok = oeU.write(molFilePath,
                                   oeMol,
                                   constantMol=False,
                                   addSdTags=True)
                    if ok:
                        pathTupList.append((sId, molFilePath, "sdf"))
            #
            if useMol2:
                mol2FilePath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                            sId + ".mol2")
                if not (useCache and mU.exists(mol2FilePath)):
                    oeU.write(mol2FilePath,
                              oeMol,
                              constantMol=False,
                              addSdTags=True)
                    if ok:
                        pathTupList.append((sId, mol2FilePath, "mol2"))
            numMols += 1
        #
        self.__storePathList(pathTupList)
        return numMols