async def startupEvent(): logger.info("Startup - loading search dependencies") # ccsw = ChemCompSearchWrapper() # clDataUrl = os.environ.get("CHEM_SEARCH_DATA_HOSTNAME", None) clDataPath = os.environ.get("CHEM_SEARCH_DATA_PATH", None) clChannel = os.environ.get("CHEM_SEARCH_UPDATE_CHANNEL", None) # logger.info("Dependency data host %r path %r update channel %r", clDataUrl, clDataPath, clChannel) if clDataUrl and clDataPath and clChannel in ["A", "B", "a", "b"]: ccsw.restoreDependencies("http://" + clDataUrl, clDataPath, bundleLabel=clChannel.upper()) # ok1 = ccsw.readConfig() ok2 = ccsw.updateChemCompIndex(useCache=True) ok3 = ccsw.reloadSearchDatabase() ok4 = ccsw.updateSearchIndex(useCache=True) # logger.info("Completed - loading search dependencies status %r", ok1 and ok2 and ok3 and ok4) ccdw = ChemCompDepictWrapper() ok1 = ccdw.readConfig() logger.info("Completed - loading depict dependencies status %r", ok1) # ccsw.status()
def __reload(self): ccsw = ChemCompSearchWrapper(cachePath=self.__cachePath) ok1 = ccsw.setConfig( self.__ccUrlTarget, self.__birdUrlTarget, ccFileNamePrefix=self.__ccFileNamePrefix, useCache=self.__useCache, numProc=self.__numProc, maxChunkSize=self.__chunkSize ) ok2 = ccsw.readConfig() ok3 = ccsw.reloadSearchDatabase() return ccsw if ok1 and ok2 and ok3 else None
def testAReadConfig(self): """Test read/access configuration""" try: ccsw = ChemCompSearchWrapper() ok = ccsw.readConfig() self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testEReloadSearchDatabase(self): """Test reload search databases.""" try: ccsw = ChemCompSearchWrapper() ok = ccsw.readConfig() self.assertTrue(ok) ok = ccsw.reloadSearchDatabase() self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testDUpdateSearchMoleculeProvider(self): """Test update of the search molecule provider.""" try: ccsw = ChemCompSearchWrapper() ok = ccsw.readConfig() self.assertTrue(ok) ok = ccsw.updateSearchMoleculeProvider() self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testCUpdateSearchIndex(self): """Test update search index.""" try: ccsw = ChemCompSearchWrapper() ok = ccsw.readConfig() self.assertTrue(ok) ok = ccsw.updateSearchIndex() self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testBUpdateChemCompIndex(self): """Test update chemical component/Bird basic index.""" try: ccsw = ChemCompSearchWrapper() ok = ccsw.readConfig() self.assertTrue(ok) ok = ccsw.updateChemCompIndex() self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testZoomSubStructSearch(self): """Test substructure search""" try: numMolsTest = self.__numMolsTest ccsw = ChemCompSearchWrapper() ok = ccsw.readConfig() self.assertTrue(ok) ok = ccsw.updateChemCompIndex(useCache=True) self.assertTrue(ok) ccIdx = ccsw.getChemCompIndex() ok = ccsw.updateSearchIndex(useCache=True) self.assertTrue(ok) ok = ccsw.reloadSearchDatabase() self.assertTrue(ok) # logger.debug("ccIdx (%d) keys %r entry %r", len(ccIdx), list(ccIdx.keys())[:10], ccIdx["000"]) # logger.info( "Dependencies loaded - Starting search test scan of (limit=%r)", numMolsTest) for ii, (ccId, ccD) in enumerate(ccIdx.items(), 1): if numMolsTest and ii > numMolsTest: break for buildType in self.__buildTypeList: if buildType in ccD: startTime = time.time() retStatus, ssL, _ = ccsw.searchByDescriptor( ccD[buildType], buildType, matchOpts="sub-struct-graph-relaxed") if retStatus == -100: logger.warning("Descriptor error continuing...") continue mOk = self.__resultContains(ccId, ssL) self.assertTrue(mOk) # ssCcIdList = list( set([t.ccId.split("|")[0] for t in ssL])) logger.info( "%s (%d) for buildType %s (%d) (%r) %r (%.4f secs)", ccId, ii, buildType, len(ssCcIdList), mOk and retStatus == 0, ssCcIdList, time.time() - startTime, ) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def updateDependencies(self): """Rebuild search indices using configuration files.""" try: logger.info("Starting update %r in %r", self.__ccFileNamePrefix, self.__cachePath) ccsw = ChemCompSearchWrapper( cachePath=self.__cachePath, ccFileNamePrefix=self.__ccFileNamePrefix) ok1 = ccsw.readConfig() ok2 = ccsw.updateChemCompIndex() ok3 = ccsw.updateSearchIndex() ok4 = ccsw.updateSearchMoleculeProvider() # verify access - ok5 = ccsw.reloadSearchDatabase() return ok1 and ok2 and ok3 and ok4 and ok5 except Exception as e: logger.exception("Failing with %s", str(e)) return False
def testZoomMatchFormula(self): """Test formula matching""" try: numMolsTest = self.__numMolsTest ccsw = ChemCompSearchWrapper() ok = ccsw.readConfig() self.assertTrue(ok) ok = ccsw.updateChemCompIndex(useCache=True) self.assertTrue(ok) ccIdx = ccsw.getChemCompIndex() # logger.debug("ccIdx (%d) keys %r entry %r", len(ccIdx), list(ccIdx.keys())[:10], ccIdx["000"]) # logger.info( "Dependencies loaded - Starting formula test scan of (limit=%r)", numMolsTest) for ii, (ccId, idxD) in enumerate(ccIdx.items(), 1): if numMolsTest and ii > numMolsTest: break # startTime = time.time() elementRangeD = { el: { "min": eCount, "max": eCount } for el, eCount in idxD["type-counts"].items() } retStatus, rL = ccsw.matchByFormulaRange(elementRangeD, ccId) mOk = self.__resultContains(ccId, rL) self.assertTrue(mOk) logger.info( "%s (%d) (%d) (%r) (%.4f secs)", ccId, ii, len(rL), mOk and retStatus == 0, time.time() - startTime, ) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testZoomFingerprintRepeat(self): """Test substructure search""" try: ccsw = ChemCompSearchWrapper() ok = ccsw.readConfig() self.assertTrue(ok) ok = ccsw.updateChemCompIndex(useCache=True) self.assertTrue(ok) ccIdx = ccsw.getChemCompIndex() ok = ccsw.updateSearchIndex(useCache=True) self.assertTrue(ok) ok = ccsw.reloadSearchDatabase() self.assertTrue(ok) # logger.debug("ccIdx (%d)", len(ccIdx)) # fpL = [] descr = "InChI=1S/C9H15N5O3/c1-3(15)6(16)4-2-11-7-5(12-4)8(17)14-9(10)13-7/h3-4,6,12,15-16H,2H2,1H3,(H4,10,11,13,14,17)/t3-,4-,6-/m1/s1" for ii in range(100): startTime = time.time() retStatus, _, fpL = ccsw.searchByDescriptor( descr, "InChI", matchOpts="fingerprint-similarity") if retStatus == -100: logger.warning("Descriptor error continuing...") continue rD = {} for mr in fpL: ccId = mr.ccId.split("|")[0] rD[ccId] = max(rD[ccId], mr.fpScore) if ccId in rD else mr.fpScore rTupL = sorted(rD.items(), key=lambda kv: kv[1], reverse=True) rL = [rTup[0] for rTup in rTupL] scoreL = [rTup[1] for rTup in rTupL] logger.info("%4d (%3d) %r (%.4f secs)", ii, len(rL), retStatus == 0, time.time() - startTime) logger.info("rL %r", rL) logger.info("scoreL %r", scoreL) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()