def getSubSearchDb(self, screenType="SMARTS", numProc=1, forceRefresh=False): if not self.__ssDb or forceRefresh: oeIo = OeIoUtils() fp = os.path.join(self.__dirPath, self.__getSubSearchFileName(screenType)) logger.info("Opening screened substructure search database %r", fp) self.__ssDb = oeIo.loadOeSubSearchDatabase(fp, screenType, numProc=numProc) return self.__ssDb
def __reload(self, **kwargs): """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions. Args: molBuildType (str): coordinates to use in building OE molecules from CIF components (model, ideal or None) limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True) fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO) screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... ) useCache (bool, optional): flag to use cached files. Defaults to True. cachePath (str): path to the top cache directory. Defaults to '.'. numProc (int): number processors to engage in screen substructure search database generation. molLimit (int, optional): limiting number of molecules in data store (default: 0 no limit) suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store. Returns: (dict): dictionary of constructed OE molecules """ useCache = kwargs.get("useCache", True) cachePath = kwargs.get("cachePath", ".") numProc = kwargs.get("numProc", 2) molLimit = kwargs.get("molLimit", 0) fpTypeList = kwargs.get("fpTypeList", ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"]) # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"]) screenTypeList = kwargs.get("screenTypeList", []) molBuildType = kwargs.get("molBuildType", "model-xyz") limitPerceptions = kwargs.get("limitPerceptions", False) quietFlag = kwargs.get("quietFlag", True) suppressHydrogens = kwargs.get("suppressHydrogens", False) logSizes = kwargs.get("logSizes", False) fpDbType = "STANDARD" # ccCount = 0 oeCount = 0 errCount = 0 failIdList = [] oeIo = OeIoUtils(quietFlag=quietFlag) # -------- oeMolFilePath = os.path.join(self.__dirPath, self.__getOeMolFileName()) if not useCache or (useCache and not self.__mU.exists(oeMolFilePath)): cmpKwargs = { k: v for k, v in kwargs.items() if k not in ["cachePath", "useCache", "molLimit"] } ccmP = ChemCompMoleculeProvider(cachePath=cachePath, useCache=True, molLimit=molLimit, **cmpKwargs) ok = ccmP.testCache(minCount=molLimit, logSizes=logSizes) ccObjD = ccmP.getMolD() if ok else {} ccCount = len(ccObjD) # ------- startTime = time.time() oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCache( oeMolFilePath, ccObjD, molBuildType=molBuildType, quietFlag=quietFlag, fpTypeList=fpTypeList, limitPerceptions=limitPerceptions, suppressHydrogens=suppressHydrogens) logger.info( "Stored %d/%d OeMols (suppressH = %r) created with molBuildType %r (unconverted %d)", oeCount, ccCount, suppressHydrogens, molBuildType, errCount) if failIdList: logger.info("%r failures %r", molBuildType, failIdList) endTime = time.time() logger.info("Constructed %d/%d cached oeMols (%.4f seconds)", oeCount, ccCount, endTime - startTime) # -------- oeMolDbFilePath = os.path.join(self.__dirPath, self.__getOeMolDbFileName()) if not useCache or (useCache and not self.__mU.exists(oeMolDbFilePath)): startTime = time.time() molCount = oeIo.createOeBinaryDatabaseAndIndex( oeMolFilePath, oeMolDbFilePath) endTime = time.time() logger.info( "Created and stored %d indexed OeMols in OE database format (%.4f seconds)", molCount, endTime - startTime) # -------- if fpDbType == "FAST": for fpType in fpTypeList: startTime = time.time() # Fast FP search database file names fpPath = os.path.join(self.__dirPath, self.__getFastFpDbFileName(fpType)) if not useCache or (useCache and not self.__mU.exists(fpPath)): ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath, fpPath, fpType=fpType) endTime = time.time() logger.info( "Created and stored %s fingerprint database (%.4f seconds)", fpType, endTime - startTime) # -------- if molBuildType in ["oe-iso-smiles"]: for screenType in screenTypeList: startTime = time.time() fp = os.path.join(self.__dirPath, self.__getSubSearchFileName(screenType)) if not useCache or (useCache and not self.__mU.exists(fp)): ok = oeIo.createOeSubSearchDatabase(oeMolFilePath, fp, screenType=screenType, numProc=numProc) endTime = time.time() logger.info( "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)", ok, screenType, endTime - startTime) # --------- ssDb = oeIo.loadOeSubSearchDatabase(fp, screenType=screenType, numProc=numProc) ok = ssDb.NumMolecules() == oeCount # ---------- return oeCount
def __reload(self, **kwargs): """Reload the dictionary of OE molecules and related data artifacts for chemical component definitions. Args: limitPerceptions(bool): process input descriptors in essentially verbatim mode (default: True) fpTypeList (list): fingerprint type (TREE,PATH,MACCS,CIRCULAR,LINGO) screenTypeList (list): fast sub search screen type (MOLECULE, SMARTS, MDL, ... ) useCache (bool, optional): flag to use cached files. Defaults to True. cachePath (str): path to the top cache directory. Defaults to '.'. numProc (int): number processors to engage in screen substructure search database generation. suppressHydrogens (bool, optional): flag to suppress explicit hydrogens in the OE data store. molLimit (int): Returns: (bool) : True for success or False othewise """ try: useCache = kwargs.get("useCache", True) cachePath = kwargs.get("cachePath", ".") numProc = kwargs.get("numProc", 2) molLimit = kwargs.get("molLimit", None) fpTypeList = kwargs.get( "fpTypeList", ["TREE", "PATH", "MACCS", "CIRCULAR", "LINGO"]) # screenTypeList = kwargs.get("screenTypeList", ["SMARTS"]) screenTypeList = kwargs.get("screenTypeList", None) limitPerceptions = kwargs.get("limitPerceptions", False) suppressHydrogens = kwargs.get("suppressHydrogens", False) quietFlag = kwargs.get("quietFlag", True) logSizes = kwargs.get("logSizes", False) fpDbType = "STANDARD" buildScreenedDb = True # oeCount = 0 errCount = 0 failIdList = [] oeIo = OeIoUtils(quietFlag=quietFlag) # -------- oeSearchMolFilePath = os.path.join(self.__dirPath, self.__getOeSearchMolFileName()) if not useCache or (useCache and not self.__mU.exists(oeSearchMolFilePath)): cmpKwargs = { k: v for k, v in kwargs.items() if k not in ["cachePath", "useCache", "molLimit"] } ccsiP = ChemCompSearchIndexProvider(cachePath=cachePath, useCache=True, molLimit=molLimit, **cmpKwargs) ok = ccsiP.testCache(minCount=molLimit, logSizes=logSizes) # ---- ccIdxD = ccsiP.getIndex() if ok else {} idxCount = len(ccIdxD) # ------- JDW OE mol construction here ----- startTime = time.time() oeCount, errCount, failIdList = oeIo.buildOeBinaryMolCacheFromIndex( oeSearchMolFilePath, ccIdxD, quietFlag=quietFlag, fpTypeList=fpTypeList, limitPerceptions=limitPerceptions, suppressHydrogens=suppressHydrogens) if failIdList: logger.info("failures %r", failIdList) endTime = time.time() logger.info( "Constructed %d/%d cached oeMols (unconverted %d) (%.4f seconds)", oeCount, idxCount, errCount, endTime - startTime) # -------- oeMolDbFilePath = os.path.join(self.__dirPath, self.__getOeMolDbFileName()) if not useCache or (useCache and not self.__mU.exists(oeMolDbFilePath)): startTime = time.time() molCount = oeIo.createOeBinaryDatabaseAndIndex( oeSearchMolFilePath, oeMolDbFilePath) endTime = time.time() logger.info( "Created and stored %d indexed oeMols in OE database format (%.4f seconds)", molCount, endTime - startTime) # -------- if fpDbType == "FAST": for fpType in fpTypeList: startTime = time.time() # Fast FP search database file names fpPath = os.path.join(self.__dirPath, self.__getFastFpDbFileName(fpType)) if not useCache or (useCache and not self.__mU.exists(fpPath)): ok = oeIo.createOeFingerPrintDatabase(oeMolDbFilePath, fpPath, fpType=fpType) endTime = time.time() logger.info( "Created and stored %s fingerprint database (%.4f seconds)", fpType, endTime - startTime) # -------- if buildScreenedDb and screenTypeList: for screenType in screenTypeList: startTime = time.time() fp = os.path.join(self.__dirPath, self.__getSubSearchFileName(screenType)) if not useCache or (useCache and not self.__mU.exists(fp)): ok = oeIo.createOeSubSearchDatabase( oeSearchMolFilePath, fp, screenType=screenType, numProc=numProc) endTime = time.time() logger.info( "Constructed screened substructure database (status %r) with screenType %s (%.4f seconds)", ok, screenType, endTime - startTime) # --------- ssDb = oeIo.loadOeSubSearchDatabase( fp, screenType=screenType, numProc=numProc) ok = ssDb.NumMolecules() == oeCount # ---------- # return True except Exception as e: logger.exception("Failing with %s", str(e)) return False