Python ChemCompSearchIndexProvider.testCacheの例

プログラミング言語: Python

名前空間/パッケージ名: rcsb.utils.chem.ChemCompSearchIndexProvider

メソッド/関数: testCache

hotexamples.comのコード掲載数: 9

Python ChemCompSearchIndexProvider.testCache - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのrcsb.utils.chem.ChemCompSearchIndexProvider.ChemCompSearchIndexProvider.testCacheの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

ChemCompSearchIndexProvider(9)

testCache(9)

getIndex(4)

コード例 #1

ファイルを表示

ファイル: testChemCompSearchIndexProvider.py プロジェクト: rcsb/py-rcsb_utils_chem

 def __testBuildSearchIndexCacheFiles(self, **kwargs):
     """Test build search index chemical component cache files from the input component dictionaries"""
     molLimit = kwargs.get("molLimit", None)
     useCache = kwargs.get("useCache", False)
     logSizes = kwargs.get("logSizes", False)
     limitPerceptions = kwargs.get("limitPerceptions", False)
     numProc = kwargs.get("numProc", 1)
     maxChunkSize = kwargs.get("maxChunkSize", 5)
     molLimit = kwargs.get("molLimit", None)
     ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc")
     quietFlag = kwargs.get("quietFlag", True)
     ccUrlTarget = kwargs.get("ccUrlTarget", None)
     birdUrlTarget = kwargs.get("birdUrlTarget", None)
     #
     ccsiP = ChemCompSearchIndexProvider(
         ccUrlTarget=ccUrlTarget,
         birdUrlTarget=birdUrlTarget,
         cachePath=self.__cachePath,
         useCache=useCache,
         molLimit=molLimit,
         ccFileNamePrefix=ccFileNamePrefix,
         limitPerceptions=limitPerceptions,
         numProc=numProc,
         maxChunkSize=maxChunkSize,
         quietFlag=quietFlag,
     )
     ok = ccsiP.testCache(minCount=molLimit, logSizes=logSizes)
     self.assertTrue(ok)
     logger.info(" ******* Completed operation ******** ")
     #
     return ccsiP

コード例 #2

ファイルを表示

 def testSubStructureSearchFromIndexSelected(self):
     matchOpts = self.__myKwargs.get("matchOpts", "sub-struct-graph-relaxed")
     numProc = self.__numProcSearch
     oemp = OeSearchMoleculeProvider(**self.__myKwargs)
     ok = oemp.testCache()
     self.assertTrue(ok)
     oesU = OeSubStructSearchUtils(oemp)
     #
     ccIdxP = ChemCompSearchIndexProvider(**self.__myKwargs)
     ok = ccIdxP.testCache(minCount=self.__minCount)
     self.assertTrue(ok)
     ccIdxD = ccIdxP.getIndex()
     ky = next(iter(ccIdxD))
     oeMol = oemp.getMol(ky)
     #
     for ccId in ["BNZ", "ALA"]:
         # ----
         startTime = time.time()
         oeMol = oemp.getMol(ccId)
         #
         ccIdL = oesU.prefilterIndex(oeMol, ccIdxP, matchOpts=matchOpts)
         logger.info("%s search length %d in (%.4f seconds)", ccId, len(ccIdL), time.time() - startTime)
         #
         retStatus, mL = oesU.searchSubStructure(oeMol, ccIdList=ccIdL, matchOpts=matchOpts, numProc=numProc)
         logger.info("%s status %r result length %d in (%.4f seconds)", ccId, retStatus, len(mL), time.time() - startTime)
         self.assertTrue(retStatus)
         self.assertTrue(self.__resultContains(ccId, mL))

コード例 #3

ファイルを表示

ファイル: PubChemIndexCacheProvider.py プロジェクト: rcsb/py-rcsb_exdb

 def __buildChemCompSearchIndex(self, numProc, **kwargs):
     """Test build search index chemical component cache files from the input component dictionaries"""
     try:
         cachePath = kwargs.get("cachePath", self.__cachePath)
         molLimit = kwargs.get("molLimit", None)
         useCache = not kwargs.get("rebuildChemIndices", False)
         logSizes = kwargs.get("logSizes", False)
         limitPerceptions = kwargs.get("limitPerceptions", False)
         #
         # numProc = kwargs.get("numProc", 1)
         # numProc = self.__numProc
         chunkSize = kwargs.get("chunkSize", 5)
         molLimit = kwargs.get("molLimit", None)
         ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc-full")
         quietFlag = kwargs.get("quietFlag", True)
         ccUrlTarget = kwargs.get("ccUrlTarget", None)
         birdUrlTarget = kwargs.get("birdUrlTarget", None)
         #
         ccsiP = ChemCompSearchIndexProvider(
             ccUrlTarget=ccUrlTarget,
             birdUrlTarget=birdUrlTarget,
             cachePath=cachePath,
             useCache=useCache,
             molLimit=molLimit,
             ccFileNamePrefix=ccFileNamePrefix,
             limitPerceptions=limitPerceptions,
             numProc=numProc,
             maxChunkSize=chunkSize,
             quietFlag=quietFlag,
         )
         ok = ccsiP.testCache(minCount=molLimit, logSizes=logSizes)
         return ok, ccsiP if ok else None
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return False, None

コード例 #4

ファイルを表示

ファイル: ChemCompSearchWrapper.py プロジェクト: rcsb/py-rcsb_utils_chem

    def updateSearchIndex(self, useCache=False):
        """Rebuild the search index from source chemical component and BIRD definitions.
           Update the internal state of this index in the current object instance.

            Resource requirements 771 secs 6 proc macbook pro 7GB memory.

        Args:
            useCache (bool): False to rebuild search index and True to reload

        Returns:
            bool: True for success or false otherwise
        """
        ok = False
        try:
            kwargs = copy.deepcopy(
                self.__configD["ccsiKwargs"]
            ) if "ccsiKwargs" in self.__configD else None
            if kwargs:
                kwargs["useCache"] = useCache
                siIdxP = ChemCompSearchIndexProvider(**kwargs)
                ok = siIdxP.testCache()
                self.__siIdxP = siIdxP if siIdxP else None
                self.__siIdx = siIdxP.getIndex() if siIdxP and ok else {}
                logger.info("Search index status %r index len %d", ok,
                            len(self.__siIdx) if self.__siIdx else 0)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return ok

コード例 #5

ファイルを表示

ファイル: testOeSearchUtilsCompare.py プロジェクト: rcsb/py-rcsb_utils_chem

class OeSubStructSearchCompareTests(unittest.TestCase):
    useFull = False

    def setUp(self):

        self.__workPath = os.path.join(HERE, "test-output")
        self.__dataPath = os.path.join(HERE, "test-data")
        self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
        self.__ccUrlTarget = os.path.join(self.__dataPath,
                                          "components-abbrev.cif")
        self.__birdUrlTarget = os.path.join(self.__dataPath,
                                            "prdcc-abbrev.cif")
        self.__doDisplay = True
        self.__numProcPrep = 6
        self.__numProcSearch = 6
        self.__minCount = None
        self.__startTime = time.time()
        #
        if OeSubStructSearchCompareTests.useFull:
            self.__myKwargs = {
                "cachePath": self.__cachePath,
                "useCache": True,
                "ccFileNamePrefix": "cc-full",
                "oeFileNamePrefix": "oe-full",
                "molBuildType": "model-xyz",
                "limitPerceptions": False,
                "screenTypeList": None,
                "numProc": self.__numProcPrep,
                "suppressHydrogens": True,
                "matchOpts": "sub-struct-graph-relaxed",
                "fpTypeCuttoffD": {
                    "TREE": 0.6,
                    "MACCS": 0.9
                },
                "maxFpResults": 50,
            }
        else:
            self.__myKwargs = {
                "ccUrlTarget": self.__ccUrlTarget,
                "birdUrlTarget": self.__birdUrlTarget,
                "cachePath": self.__cachePath,
                "useCache": True,
                "ccFileNamePrefix": "cc-abbrev",
                "oeFileNamePrefix": "oe-abbrev",
                "molBuildType": "model-xyz",
                "limitPerceptions": False,
                "screenTypeList": None,
                "numProc": self.__numProcPrep,
                "suppressHydrogens": True,
                "matchOpts": "sub-struct-graph-relaxed",
                "fpTypeCuttoffD": {
                    "TREE": 0.6,
                    "MACCS": 0.9
                },
                "maxFpResults": 50,
            }
        #
        self.__oesmP = OeSearchMoleculeProvider(**self.__myKwargs)
        ok = self.__oesmP.testCache()
        self.assertTrue(ok)
        #
        self.__ccmP = ChemCompMoleculeProvider(**self.__myKwargs)
        self.__ccmP.testCache()
        #
        self.__ccsidxP = ChemCompSearchIndexProvider(**self.__myKwargs)
        ok = self.__ccsidxP.testCache(minCount=self.__minCount)
        self.assertTrue(ok)
        self.__oessU = OeSubStructSearchUtils(self.__oesmP)
        ok = self.__oessU.testCache()
        self.assertTrue(ok)
        #
        fpTypeCuttoffD = self.__myKwargs.get("fpTypeCuttoffD", {})
        fpTypeList = [k for k, v in fpTypeCuttoffD.items()]
        self.__oesU = OeSearchUtils(self.__oesmP, fpTypeList=fpTypeList)
        ok = self.__oesU.testCache()
        self.assertTrue(ok)
        #
        logger.debug("Running tests on version %s", __version__)
        logger.info("Starting %s at %s", self.id(),
                    time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.info("Completed %s at %s (%.4f seconds)", self.id(),
                    time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                    endTime - self.__startTime)

    @unittest.skipIf(not useFull, "Requires full data set")
    def testSubStructSearchDescriptor(self):
        #
        query = "n1ccccc1"
        queryId = "query-smiles"
        queryType = "oe-iso-smiles"
        #
        limitPerceptions = self.__myKwargs.get("limitPerceptions", False)
        suppressHydrogens = self.__myKwargs.get("suppressHydrogens", True)
        numProc = self.__myKwargs.get("numProc", 4)
        # for matchOpts in ["sub-struct-graph-relaxed", "sub-struct-graph-relaxed-stereo", "sub-struct-graph-strict"]:
        for matchOpts in ["sub-struct-graph-strict"]:
            #
            oeMol = self.__getMol(query,
                                  queryType,
                                  queryId,
                                  limitPerceptions=limitPerceptions,
                                  suppressHydrogens=suppressHydrogens)
            startTime = time.time()
            retStatus, mL = self.__search(oeMol, matchOpts, numProc)
            logger.info(
                "%s status (%r) matchOpts %s result %d in (%.4f seconds)",
                queryId, retStatus, matchOpts, len(mL),
                time.time() - startTime)
            self.assertTrue(retStatus)
            if queryType == "CC":
                self.assertTrue(self.__resultContains(queryId, mL))
            #
            if self.__doDisplay:
                self.__display(mL, query, queryId, queryType, matchOpts)

    @unittest.skipIf(not useFull, "Requires full data set")
    def testSubStructSearchSelected(self):
        #
        query = queryId = "STI"
        queryType = "CC"
        #
        limitPerceptions = self.__myKwargs.get("limitPerceptions", False)
        suppressHydrogens = self.__myKwargs.get("suppressHydrogens", True)
        numProc = self.__myKwargs.get("numProc", 4)
        for matchOpts in [
                "sub-struct-graph-relaxed", "sub-struct-graph-relaxed-stereo",
                "sub-struct-graph-strict"
        ]:
            #
            oeMol = self.__getMol(query,
                                  queryType,
                                  queryId,
                                  limitPerceptions=limitPerceptions,
                                  suppressHydrogens=suppressHydrogens)
            startTime = time.time()
            retStatus, mL = self.__search(oeMol, matchOpts, numProc)
            logger.info(
                "%s status (%r) matchOpts %s result %d in (%.4f seconds)",
                queryId, retStatus, matchOpts, len(mL),
                time.time() - startTime)
            self.assertTrue(retStatus)
            if queryType == "CC":
                self.assertTrue(self.__resultContains(queryId, mL))
            #
            if self.__doDisplay:
                self.__display(mL, query, queryId, queryType, matchOpts)

    @unittest.skipIf(not useFull, "Requires full data set")
    def testSubStructSearchAll(self):
        #
        ccD = self.__ccmP.getMolD()
        for ccId in ccD:
            query = queryId = ccId
            if ccId in ["UNX", "UNL", "UNK", "DUM"]:
                continue
            queryType = "CC"
            #
            limitPerceptions = self.__myKwargs.get("limitPerceptions", False)
            suppressHydrogens = self.__myKwargs.get("suppressHydrogens", True)
            numProc = self.__myKwargs.get("numProc", 5)
            for matchOpts in [
                    "sub-struct-graph-relaxed",
                    "sub-struct-graph-relaxed-stereo",
                    "sub-struct-graph-strict"
            ]:
                #
                oeMol = self.__getMol(query,
                                      queryType,
                                      queryId,
                                      limitPerceptions=limitPerceptions,
                                      suppressHydrogens=suppressHydrogens)
                if oeMol.NumAtoms() < 3:
                    continue
                #
                startTime = time.time()
                retStatus, mL = self.__search(oeMol, matchOpts, numProc)
                logger.info(
                    "%s status (%r) matchOpts %s result %d in (%.4f seconds)",
                    queryId, retStatus, matchOpts, len(mL),
                    time.time() - startTime)
                self.assertTrue(retStatus)
                if queryType == "CC":
                    self.assertTrue(self.__resultContains(queryId, mL))
                #
                if self.__doDisplay:
                    self.__display(mL, query, queryId, queryType, matchOpts)

    @unittest.skipIf(not useFull, "Requires full data set")
    def testMatchSearchSelected(self):
        #
        query = queryId = "STI"
        queryType = "CC"
        #
        limitPerceptions = self.__myKwargs.get("limitPerceptions", False)
        suppressHydrogens = self.__myKwargs.get("suppressHydrogens", True)
        numProc = self.__myKwargs.get("numProc", 4)
        for matchOpts in [
                "fingerprint-similarity", "graph-relaxed",
                "graph-relaxed-stereo", "graph-strict"
        ]:
            #
            oeMol = self.__getMol(query,
                                  queryType,
                                  queryId,
                                  limitPerceptions=limitPerceptions,
                                  suppressHydrogens=suppressHydrogens)
            startTime = time.time()
            retStatus, mL = self.__search(oeMol, matchOpts, numProc)
            logger.info(
                "%s status (%r) matchOpts %s result %d in (%.4f seconds)",
                queryId, retStatus, matchOpts, len(mL),
                time.time() - startTime)
            self.assertTrue(retStatus)
            if queryType == "CC":
                self.assertTrue(self.__resultContains(queryId, mL))
            #
            if self.__doDisplay:
                self.__display(mL, query, queryId, queryType, matchOpts)

    def __search(self, oeMol, matchOpts, numProc):
        if matchOpts.startswith("sub-struct-"):
            retStatus, mL = self.__subStructureSearch(oeMol,
                                                      matchOpts=matchOpts,
                                                      numProc=numProc)
        else:
            retStatus, mL, fpL = self.__matchSearch(oeMol, matchOpts=matchOpts)
        #
        rL = fpL if matchOpts in ["fingerprint-similarity"] else mL
        return retStatus, rL

    #
    def __subStructureSearch(self, oeMol, matchOpts, numProc):
        ##
        ccIdL = self.__oessU.prefilterIndex(oeMol,
                                            self.__ccsidxP,
                                            matchOpts=matchOpts,
                                            skipFeatures=False)
        retStatus, mL = self.__oessU.searchSubStructure(oeMol,
                                                        ccIdList=ccIdL,
                                                        matchOpts=matchOpts,
                                                        numProc=numProc)
        return retStatus, mL

    def __matchSearch(self, oeMol, matchOpts="graph-relaxed"):
        ssL = fpL = []
        try:
            fpTypeCuttoffD = self.__myKwargs.get("fpTypeCuttoffD", {})
            maxFpResults = self.__myKwargs.get("maxFpResults", 50)
            retStatus, ssL, fpL = self.__oesU.searchSubStructureAndFingerPrint(
                oeMol,
                list(fpTypeCuttoffD.items())[:2],
                maxFpResults,
                matchOpts=matchOpts)
            # logger.info("fpL %r", fpL)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            #
        return retStatus, ssL, fpL

    def __getMol(self,
                 query,
                 queryType,
                 queryId,
                 limitPerceptions=False,
                 suppressHydrogens=True):
        oeioU = OeIoUtils()
        if queryType == "CC":
            oeMol = self.__oesmP.getMol(query)
        else:
            oeMol = oeioU.descriptorToMol(query,
                                          queryType,
                                          limitPerceptions=limitPerceptions,
                                          messageTag=queryId)
        #
        if suppressHydrogens:
            oeMol = oeioU.suppressHydrogens(oeMol)
        oeMol.SetTitle(queryId)
        return oeMol

    def __resultContains(self, ccId, matchResultList):
        for matchResult in matchResultList:
            if matchResult.ccId == ccId:
                return True
        return False

    #
    #  ------ ------ ------ ------ ------ ------ ------ ------ ------ ------ ------ ------ ------
    def __display(self, mL, query, queryId, queryType, matchOpts):
        smL = sorted(mL, key=lambda kv: kv.fpScore, reverse=True)
        # ----
        tD = {}
        for sm in smL:
            ccId = sm.ccId.split("|")[0]
            tD.setdefault(ccId, []).append(sm)
        dL = []
        for ccId, ttL in tD.items():
            if len(ttL) == 1:
                dL.append(ttL[0])
            else:
                parent = False
                for tt in ttL:
                    if tt.ccId == ccId:
                        dL.append(tt)
                        parent = True
                        break
                if not parent:
                    dL.append(ttL[0])
        # ----
        pdfImagePath = os.path.join(self.__workPath,
                                    queryId + "-" + matchOpts + ".pdf")
        self.__displayPaginatedAlignments(pdfImagePath,
                                          query,
                                          queryType,
                                          queryId,
                                          dL,
                                          matchOpts=matchOpts)

    def __displayPaginatedAlignments(self,
                                     pdfImagePath,
                                     query,
                                     queryType,
                                     queryId,
                                     matchResultList,
                                     matchOpts="relaxed-stereo",
                                     alignMode="SS"):
        refId = queryId
        oeMolRef = self.__getMol(query,
                                 queryType,
                                 queryId,
                                 limitPerceptions=False,
                                 suppressHydrogens=True)
        pairList = []
        for mr in sorted(matchResultList,
                         key=lambda kv: kv.fpScore,
                         reverse=True):
            fitId = mr.ccId.split("|")[0]
            if len(mr.ccId) > 4:
                fitId = fitId + " (tautomer/protomer)"
            oeMolFit = self.__oesmP.getMol(mr.ccId)
            pairList.append((refId, oeMolRef, fitId, oeMolFit))
        #
        self.__depictFitList(pdfImagePath,
                             pairList,
                             matchOpts=matchOpts,
                             alignMode=alignMode)

    def __pairDepictPage(self,
                         imagePath,
                         refId,
                         refTitle,
                         refMol,
                         fitId,
                         fitTitle,
                         fitMol,
                         matchOpts="strict"):
        """Depict pairwise alignment of the input reference and fit molecules.

        Args:
            imagePath (str): path to image (format by path extension)
            refId (str): reference molecule identifier
            refTitle (str): reference molecule title
            refMol (obj): reference OE molecule object
            fitId (str): fit molecule identifier
            fitTitle (str): fit molecule title
            fitMol (obj): fit OE molecule object
            matchOpts (str, optional): alignment criteria (relaxed|relaxed-stereo|strict). Defaults to "strict".

        Returns:
            (list): atom mapping in all aligned figures
                    [(reference component Id, reference atom name, fit chemical component Id, fit atom name)
        """
        aML = []
        try:
            oed = OeDepictMCSAlignPage()
            oed.setSearchType(sType=matchOpts)

            oed.setRefMol(refMol, refId, title=refTitle)
            oed.setFitMol(fitMol, fitId, title=fitTitle)
            oed.setDisplayOptions(
                imageSizeX=2000,
                imageSizeY=1000,
                labelAtomName=True,
                labelAtomCIPStereo=True,
                labelAtomIndex=False,
                labelBondIndex=False,
                highlightStyleFit="ballAndStickInverse",
                bondDisplayWidth=0.5,
                highLightMatchColorRef="green",
                highLightNotMatchColorRef="pink",
            )
            aML = oed.alignPair(imagePath=imagePath)
            if aML:
                for (rCC, rAt, tCC, tAt) in aML:
                    logger.debug("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return aML

    def __depictFitList(self,
                        pdfImagePath,
                        pairList,
                        matchOpts="exact",
                        alignMode="SS"):
        """Depict pairwise alignments with multi-page layout in PDF format.

        Args:
            pdfImagePath (str): PDF image path
            pairList (list): [(refId, refOeMol, fitId, fitOeMol)]

        Returns:
            (list): atom mapping in all aligned figures
                    [(reference component Id, reference atom name, fit chemical component Id, fit atom name)
        """
        aML = []
        try:
            if alignMode == "MCSS":
                oed = OeDepictMCSAlignMultiPage()
            else:
                oed = OeDepictSubStructureAlignMultiPage()
            oed.setSearchType(sType=matchOpts)
            oed.setPairMolList(pairList)

            oed.setDisplayOptions(
                labelAtomName=True,
                labelAtomCIPStereo=True,
                labelAtomIndex=False,
                labelBondIndex=False,
                highlightStyleFit="ballAndStickInverse",
                pageOrientation="portrait",
                gridRows=4,
                bondDisplayWidth=0.5,
                highLightMatchColorRef="green",
                highLightNotMatchColorRef="pink",
            )
            aML = oed.alignPairListMulti(imagePath=pdfImagePath)
            if aML:
                for (rCC, rAt, tCC, tAt) in aML:
                    logger.debug("%5s %-5s %5s %-5s", rCC, rAt, tCC, tAt)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return aML

コード例 #6

ファイルを表示

ファイル: CODModelBuild.py プロジェクト: rcsb/py-rcsb_ccmodels

class CODModelBuild(object):
    def __init__(self, cachePath, prefix=None, **kwargs):
        self.__cachePath = cachePath

        self.__prefix = prefix
        startTime = time.time()
        useCache = True
        self.__timeOut = kwargs.get("timeOut", None)
        self.__ccUrlTarget = kwargs.get("ccUrlTarget", None)
        self.__birdUrlTarget = kwargs.get("birdUrlTarget", None)
        ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc"
        oeFileNamePrefix = "oe-%s" % self.__prefix if self.__prefix else "oe"
        #
        self.__startTime = time.time()
        self.__ccmP = ChemCompMoleculeProvider(
            ccUrlTarget=self.__ccUrlTarget,
            birdUrlTarget=self.__birdUrlTarget,
            cachePath=cachePath,
            useCache=useCache,
            ccFileNamePrefix=ccFileNamePrefix)
        ok1 = self.__ccmP.testCache()

        self.__ccSIdxP = ChemCompSearchIndexProvider(
            cachePath=cachePath,
            useCache=useCache,
            ccFileNamePrefix=ccFileNamePrefix)
        ok2 = self.__ccSIdxP.testCache()

        molLimit = kwargs.get("molLimit", None)
        quietFlag = kwargs.get("quietFlag", True)
        fpTypeList = kwargs.get("fpTypeList", [])
        screenTypeList = kwargs.get("screenTypeList", [])
        limitPerceptions = kwargs.get("limitPerceptions", False)
        numProc = kwargs.get("numProc", 4)
        self.__oesmP = OeSearchMoleculeProvider(
            ccUrlTarget=self.__ccUrlTarget,
            birdUrlTarget=self.__birdUrlTarget,
            cachePath=self.__cachePath,
            ccFileNamePrefix=ccFileNamePrefix,
            oeFileNamePrefix=oeFileNamePrefix,
            useCache=useCache,
            quietFlag=quietFlag,
            fpTypeList=fpTypeList,
            screenTypeList=screenTypeList,
            numProc=numProc,
            molLimit=molLimit,
            limitPerceptions=limitPerceptions,
        )
        ok3 = self.__oesmP.testCache()
        self.__oesmP.getOeMolD()

        logger.info(
            "Completed chemical component search index load %r (%.4f seconds)",
            ok1 & ok2 & ok3,
            time.time() - startTime)
        logUsage("main", "Setup completed", self.__startTime)
        #
        logger.info("Starting model build (%s) at %s", __version__,
                    time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def getModelDirFilePath(self):
        dN = "cod-%s-model-files" % self.__prefix if self.__prefix else "cod-model-files"
        return os.path.join(self.__cachePath, dN)

    def getModelImageDirFilePath(self):
        dN = "cod-%s-model-image" % self.__prefix if self.__prefix else "cod-model-images"
        return os.path.join(self.__cachePath, dN)

    def __getModelIndexPath(self):
        return os.path.join(self.getModelDirFilePath(), "cod-model-index.json")

    def fetchModelIndex(self):
        mD = {}
        try:
            mU = MarshalUtil(workPath=self.__cachePath)
            fp = self.__getModelIndexPath()
            mD = mU.doImport(fp, fmt="json")
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return mD

    def storeModelIndex(self, mD):
        try:
            mU = MarshalUtil(workPath=self.__cachePath)
            fp = self.__getModelIndexPath()
            ok = mU.doExport(fp, mD, fmt="json", indent=3)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            ok = False
        return ok

    def build(self,
              alignType="relaxed-stereo",
              numProc=4,
              chunkSize=10,
              verbose=False,
              doFigures=True):
        """Run the model build step in the chemical component model workflow.

        Args:
          alignType (str):  "relaxed"|"strict"| relaxed-stereo".  Default: relaxed-stereo
          numProc (int, optional): number of processes to invoke. Defaults to 4.
          chunkSize (int, optional): work chunksize. Defaults to 10.
          verbose (bool, optional): verbose logging.  Defaults to False.

        Returns:
            (dict): {searchId: [{"targetId": , "modelId": , "modelPath": ,"matchId": , "parentId": , "rFactor": , }]

        """
        retD = {}
        try:
            mU = MarshalUtil(workPath=self.__cachePath)
            ccms = CODModelSearch(self.__cachePath, prefix=self.__prefix)
            modelDirPath = self.getModelDirFilePath()
            imageDirPath = self.getModelImageDirFilePath()
            #
            tD = ccms.getResultIndex()
            # Make parent index ---
            idxIdD = {}
            for idxId, iDL in tD.items():
                pId = idxId.split("|")[0]
                idxIdD.setdefault(pId, []).extend(iDL)
            #
            idxIdL = list(idxIdD.keys())
            midxIdL = []
            for pId in idxIdL:
                fp = os.path.join(modelDirPath, pId, "model-index.json")
                if mU.exists(fp):
                    # Skip empty indices
                    fst = os.stat(fp)
                    if fst.st_size > 10:
                        continue
                midxIdL.append(pId)
            #
            logger.info(
                "Starting COD model build using (%d) from a total of results length (%d)",
                len(midxIdL), len(idxIdD))
            #
            cmbw = CODModelBuildWorker(self.__cachePath,
                                       verbose=verbose,
                                       timeOut=self.__timeOut)
            mpu = MultiProcUtil(verbose=True)
            mpu.setWorkingDir(modelDirPath)
            mpu.setOptions(
                optionsD={
                    "modelDirPath": modelDirPath,
                    "imageDirPath": imageDirPath,
                    "alignType": alignType,
                    "ccSIdxP": self.__ccSIdxP,
                    "idxIdD": idxIdD,
                    "oesmP": self.__oesmP,
                    "ccmP": self.__ccmP,
                    "doFigures": doFigures,
                })
            #
            mpu.set(workerObj=cmbw, workerMethod="build")
            ok, failList, resultList, _ = mpu.runMulti(dataList=midxIdL,
                                                       numProc=numProc,
                                                       numResults=1,
                                                       chunkSize=chunkSize)
            logger.info(
                "Run ended with status %r success count %d failures %r", ok,
                len(resultList[0]), len(failList))
            successList = copy.copy(resultList[0])
            #
            if successList:
                logger.info("Completed build with %d models ",
                            len(successList))
            else:
                logger.info("No models built")
            #
            # Build full index -
            #
            logger.info("Building full model index")
            for pId in idxIdL:
                fp = os.path.join(modelDirPath, pId, "model-index.json")
                if mU.exists(fp):
                    tDL = mU.doImport(fp, fmt="json")
                    for tD in tDL:
                        retD.setdefault(tD["parentId"], []).append(tD)
            #
            retD = dict(sorted(retD.items()))
            logger.info("Storing models for %d parent components", len(retD))
            ok = self.storeModelIndex(retD)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return retD

コード例 #7

ファイルを表示

ファイル: OeSearchMoleculeProvider.py プロジェクト: rcsb/py-rcsb_utils_chem

    def __reload(self, **kwargs):
        """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions.

        Args:
            limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True)
            fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO)
            screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... )
            useCache (bool, optional): flag to use cached files. Defaults to True.
            cachePath (str): path to the top cache directory. Defaults to '.'.
            numProc (int): number processors to engage in screen substructure search database generation.
            suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store.
            molLimit (int):

        Returns:
            (bool) : True for success or False othewise

        """
        try:
            useCache = kwargs.get("useCache", True)
            cachePath = kwargs.get("cachePath", ".")
            numProc = kwargs.get("numProc", 2)
            molLimit = kwargs.get("molLimit", None)
            fpTypeList = kwargs.get(
                "fpTypeList", ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"])
            # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"])
            screenTypeList = kwargs.get("screenTypeList", None)

            limitPerceptions = kwargs.get("limitPerceptions", False)
            suppressHydrogens = kwargs.get("suppressHydrogens", False)
            quietFlag = kwargs.get("quietFlag", True)
            logSizes = kwargs.get("logSizes", False)
            fpDbType = "STANDARD"
            buildScreenedDb = True
            #
            oeCount = 0
            errCount = 0
            failIdList = []
            oeIo = OeIoUtils(quietFlag=quietFlag)
            # --------
            oeSearchMolFilePath = os.path.join(self.__dirPath,
                                               self.__getOeSearchMolFileName())
            if not useCache or (useCache
                                and not self.__mU.exists(oeSearchMolFilePath)):
                cmpKwargs = {
                    k: v
                    for k, v in kwargs.items()
                    if k not in ["cachePath", "useCache", "molLimit"]
                }
                ccsiP = ChemCompSearchIndexProvider(cachePath=cachePath,
                                                    useCache=True,
                                                    molLimit=molLimit,
                                                    **cmpKwargs)
                ok = ccsiP.testCache(minCount=molLimit, logSizes=logSizes)
                # ----
                ccIdxD = ccsiP.getIndex() if ok else {}
                idxCount = len(ccIdxD)
                # ------- JDW OE mol construction here -----
                startTime = time.time()
                oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCacheFromIndex(
                    oeSearchMolFilePath,
                    ccIdxD,
                    quietFlag=quietFlag,
                    fpTypeList=fpTypeList,
                    limitPerceptions=limitPerceptions,
                    suppressHydrogens=suppressHydrogens)
                if failIdList:
                    logger.info("failures %r", failIdList)
                endTime = time.time()
                logger.info(
                    "Constructed %d/%d cached oeMols  (unconverted %d) (%.4f seconds)",
                    oeCount, idxCount, errCount, endTime - startTime)
            # --------
            oeMolDbFilePath = os.path.join(self.__dirPath,
                                           self.__getOeMolDbFileName())
            if not useCache or (useCache
                                and not self.__mU.exists(oeMolDbFilePath)):
                startTime = time.time()
                molCount = oeIo.createOeBinaryDatabaseAndIndex(
                    oeSearchMolFilePath, oeMolDbFilePath)
                endTime = time.time()
                logger.info(
                    "Created and stored %d indexed oeMols in OE database format (%.4f seconds)",
                    molCount, endTime - startTime)

            # --------
            if fpDbType == "FAST":
                for fpType in fpTypeList:
                    startTime = time.time()
                    #  Fast FP search database file names
                    fpPath = os.path.join(self.__dirPath,
                                          self.__getFastFpDbFileName(fpType))
                    if not useCache or (useCache
                                        and not self.__mU.exists(fpPath)):
                        ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath,
                                                              fpPath,
                                                              fpType=fpType)
                        endTime = time.time()
                        logger.info(
                            "Created and stored %s fingerprint database (%.4f seconds)",
                            fpType, endTime - startTime)
            # --------
            if buildScreenedDb and screenTypeList:
                for screenType in screenTypeList:
                    startTime = time.time()
                    fp = os.path.join(self.__dirPath,
                                      self.__getSubSearchFileName(screenType))
                    if not useCache or (useCache and not self.__mU.exists(fp)):
                        ok = oeIo.createOeSubSearchDatabase(
                            oeSearchMolFilePath,
                            fp,
                            screenType=screenType,
                            numProc=numProc)
                        endTime = time.time()
                        logger.info(
                            "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)",
                            ok, screenType, endTime - startTime)
                        # ---------
                        ssDb = oeIo.loadOeSubSearchDatabase(
                            fp, screenType=screenType, numProc=numProc)
                        ok = ssDb.NumMolecules() == oeCount
                        # ----------
            #
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False

コード例 #8

ファイルを表示

ファイル: ChemCompModelGen.py プロジェクト: rcsb/py-rcsb_ccmodels

    def buildSearchFiles(self, **kwargs):
        """Build cif, sdf (optional), and mol2 files for components in the chemical component search index.
           Exclude ions or other extraneous molecules lacking bonds.

        Args:
            ccUrlTarget (str): locator for source chemical component dictionary (default: full public dictionary)
            birdUrlTarget (str): locator for source BIRD dictionary (default: full public dictionary)
            limitPerceptions (bool): restrict automatic perceptions in OE molecular build operations (default: False)
            numProc (int): number of processors
            useCache (bool): use existing resource file where possible (default: True)
            molLimit (str):  limit the number to ingested chemical compont (default: None)
            quietFlag (bool): suppress output in OE library operations (default: True)

        Returns:
            (int): number molfiles generated
        """
        cachePath = self.__cachePath
        ccUrlTarget = kwargs.get("ccUrlTarget", None)
        birdUrlTarget = kwargs.get("birdUrlTarget", None)
        molLimit = kwargs.get("molLimit", None)
        quietFlag = kwargs.get("quietFlag", True)
        fpTypeList = kwargs.get("fpTypeList", [])
        screenTypeList = kwargs.get("screenTypeList", [])
        ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc-full"
        oeFileNamePrefix = "oe-%s" % self.__prefix if self.__prefix else "oe-cc-full"
        numProc = kwargs.get("numProc", 2)
        minCount = kwargs.get("minCount", 0)
        useCache = kwargs.get("useCache", True)
        useSdf = kwargs.get("useSdf", True)
        useMol2 = kwargs.get("useMol2", False)
        limitPerceptions = kwargs.get("limitPerceptions", False)
        logSizes = False
        #
        startTime = time.time()
        ccmP = ChemCompMoleculeProvider(cachePath=cachePath,
                                        useCache=useCache,
                                        ccFileNamePrefix=ccFileNamePrefix,
                                        ccUrlTarget=ccUrlTarget,
                                        birdUrlTarget=birdUrlTarget,
                                        molLimit=molLimit)
        ok = ccmP.testCache(minCount=minCount, logSizes=logSizes)
        logger.info(
            "Completed chemical component provider load %r (%.4f seconds)", ok,
            time.time() - startTime)
        #
        startTime = time.time()
        oesmp = OeSearchMoleculeProvider(
            ccUrlTarget=ccUrlTarget,
            birdUrlTarget=birdUrlTarget,
            cachePath=cachePath,
            ccFileNamePrefix=ccFileNamePrefix,
            oeFileNamePrefix=oeFileNamePrefix,
            useCache=useCache,
            quietFlag=quietFlag,
            fpTypeList=fpTypeList,
            screenTypeList=screenTypeList,
            numProc=numProc,
            molLimit=molLimit,
            limitPerceptions=limitPerceptions,
        )
        ok = oesmp.testCache()
        logger.info("Completed OE molecule provider load %r (%.4f seconds)",
                    ok,
                    time.time() - startTime)
        #
        startTime = time.time()
        ccSIdxP = ChemCompSearchIndexProvider(
            cachePath=cachePath,
            useCache=useCache,
            ccFileNamePrefix=ccFileNamePrefix,
            limitPerceptions=limitPerceptions,
            numProc=numProc)
        ok = ccSIdxP.testCache()
        logger.info(
            "Completed chemical component search index load %r (%.4f seconds)",
            ok,
            time.time() - startTime)
        #
        ccSIdx = ccSIdxP.getIndex() if ccSIdxP and ok else {}
        logger.info("Search index status %r index length %d", ok, len(ccSIdx))
        #
        ccIdD = {}
        mU = MarshalUtil()
        oeU = OeIoUtils(dirPath=cachePath)
        numMols = 0
        searchFileDirPath = self.getSearchDirFilePath()
        pathTupList = []
        for sId in ccSIdx:
            ccId = sId.split("|")[0]
            # standard CIF definition
            if ccId not in ccIdD:
                cifPath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                       ccId + ".cif")
                if not (useCache and mU.exists(cifPath)):
                    ccMol = ccmP.getMol(ccId)
                    if not self.__checkCif(ccMol):
                        continue
                    mU.doExport(cifPath, [ccMol], fmt="mmcif")
            #
            oeMol = oesmp.getMol(sId)
            if not self.__checkOeMol(oeMol):
                continue
            #
            # Sanity checks on the generated OE molecule
            #
            cifPath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                   sId + ".cif")
            if sId != ccId and not (useCache and mU.exists(cifPath)):
                oeccU = OeChemCompUtils()
                ok = oeccU.addOeMol(sId,
                                    oeMol,
                                    missingModelXyz=True,
                                    writeIdealXyz=False)
                if ok:
                    oeccU.write(cifPath)

            if useSdf:
                molFilePath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                           sId + ".sdf")
                if not (useCache and mU.exists(molFilePath)):
                    ok = oeU.write(molFilePath,
                                   oeMol,
                                   constantMol=False,
                                   addSdTags=True)
                    if ok:
                        pathTupList.append((sId, molFilePath, "sdf"))
            #
            if useMol2:
                mol2FilePath = os.path.join(searchFileDirPath, ccId[0], ccId,
                                            sId + ".mol2")
                if not (useCache and mU.exists(mol2FilePath)):
                    oeU.write(mol2FilePath,
                              oeMol,
                              constantMol=False,
                              addSdTags=True)
                    if ok:
                        pathTupList.append((sId, mol2FilePath, "mol2"))
            numMols += 1
        #
        self.__storePathList(pathTupList)
        return numMols

コード例 #9

ファイルを表示

ファイル: ChemCompModelBuild.py プロジェクト: rcsb/py-rcsb_ccmodels

class ChemCompModelBuild(object):
    def __init__(self, cachePath, prefix=None):
        self.__cachePath = cachePath

        self.__prefix = prefix
        startTime = time.time()
        useCache = True
        ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc"
        self.__ccSIdxP = ChemCompSearchIndexProvider(cachePath=cachePath, useCache=useCache, ccFileNamePrefix=ccFileNamePrefix)
        ok = self.__ccSIdxP.testCache()
        logger.info("Completed chemical component search index load %r (%.4f seconds)", ok, time.time() - startTime)
        #
        self.__startTime = time.time()
        logger.info("Starting model build (%s) at %s", __version__, time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def getModelDirFilePath(self):
        dN = "cc-%s-model-files" % self.__prefix if self.__prefix else "cc-model-files"
        return os.path.join(self.__cachePath, dN)

    def getModelImageDirFilePath(self):
        dN = "cc-%s-model-image" % self.__prefix if self.__prefix else "cc-model-images"
        return os.path.join(self.__cachePath, dN)

    def fetchModelIndex(self):
        mD = {}
        try:
            mU = MarshalUtil(workPath=self.__cachePath)
            fp = self.__getModelIndexPath()
            mD = mU.doImport(fp, fmt="json")
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return mD

    def storeModelIndex(self, mD):
        try:
            mU = MarshalUtil(workPath=self.__cachePath)
            fp = self.__getModelIndexPath()
            ok = mU.doExport(fp, mD, fmt="json", indent=3)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            ok = False
        return ok

    def __getModelIndexPath(self):
        return os.path.join(self.getModelDirFilePath(), "model-index.json")

    def build(self, alignType="relaxed-stereo", numProc=4, chunkSize=10, verbose=False):
        """Run the model build step in the chemical component model workflow.

        Args:
          alignType (str):  "relaxed"|"strict"| relaxed-stereo".  Default: relaxed-stereo
          numProc (int, optional): number of processes to invoke. Defaults to 4.
          chunkSize (int, optional): work chunksize. Defaults to 10.
          verbose (bool, optional): verbose logging.  Defaults to False.

        Returns:
            (dict): {searchId: [{"targetId": , "modelId": , "modelPath": ,"matchId": , "parentId": , "rFactor": , }]

        """
        retD = {}
        try:
            ccms = ChemCompModelSearch(self.__cachePath, None, None, prefix=self.__prefix)
            modelDirPath = self.getModelDirFilePath()
            imageDirPath = self.getModelImageDirFilePath()
            #
            idxPathD = ccms.getResultIndex()
            idxPathL = list(idxPathD.values())
            pD = {}
            for sId in idxPathD:
                parentId = sId.split("|")[0]
                pD.setdefault(parentId, []).append(sId)
            logger.info("Using search result index length ridxD (%d) parent coverage (%d)", len(idxPathD), len(pD))
            #
            pU = ChemCompModelBuildWorker(self.__cachePath, verbose=verbose)
            mpu = MultiProcUtil(verbose=True)
            mpu.setWorkingDir(modelDirPath)
            mpu.setOptions(optionsD={"modelDirPath": modelDirPath, "imageDirPath": imageDirPath, "alignType": alignType, "ccSIdxP": self.__ccSIdxP})
            #
            mpu.set(workerObj=pU, workerMethod="build")

            ok, failList, resultList, _ = mpu.runMulti(dataList=idxPathL, numProc=numProc, numResults=1, chunkSize=chunkSize)
            logger.info("Run ended with status %r success count %d failures %r", ok, len(resultList[0]), len(failList))
            successList = copy.copy(resultList[0])
            for tD in successList:
                retD.setdefault(tD["parentId"], []).append(tD)
            #
            if retD:
                logger.info("Completed build with models for %d parent chemical definitions", len(retD))
            else:
                logger.info("No models built")
            ok = self.storeModelIndex(retD)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return retD