def testAccessEntityPolymerReadCache(self): """Test case - access cached entity polymer info from test cache""" try: epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs) logger.info("Cache entry count %d", epe.getEntryCount()) cD = epe.countRefSeqAccessions("UNP") self.assertGreaterEqual(len(cD), 2) logger.info("UNP reference sequences per entity %r", dict(sorted(cD.items()))) logger.info("Reference sequences per entity %r", dict(sorted(epe.countRefSeqAccessionAny().items()))) logger.info("Reference sequences per ref db %r", dict(sorted(epe.countRefSeqAccessionDbType().items()))) # ok = epe.checkRefSeqAlignRange("UNP") self.assertTrue(ok) unpL = epe.getRefSeqAccessions("UNP") logger.info("Unique UNP reference sequences %d", len(unpL)) self.assertTrue(ok) tD = epe.getUniqueTaxons() logger.info("Unique taxons %d", len(tD)) tD = epe.countRefSeqAccessionByTaxon("UNP") logger.info("Unique taxons %d", len(tD)) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testTaxonomyEntityPolymerReadCache(self): """Test case - evaluate taxonomy - from full cache""" try: taxIdList = [562, 9606, 3701] for taxId in taxIdList: tU = TaxonomyUtils(taxDirPath=self.__workPath) tL = tU.getLineage(taxId) logger.info("Taxonomy lineage for %d %r", taxId, tL) # # epe = EntityPolymerExtractor( self.__cfgOb, saveCachePath=self.__fullEntitySaveCachePath, useCache=True, saveCacheKwargs=self.__fullCacheKwargs) logger.info("Cache entry count %d", epe.getEntryCount()) logger.info( "Reference sequences per ref db %r", dict(sorted(epe.countRefSeqAccessionDbType().items()))) rD = epe.countRefSeqAccessionByTaxon(dbNameList=["UNP"]) logger.info("Unique taxons %d", len(list(rD.keys()))) # numT = 0 for tId, aL in rD.items(): tL = tU.getLineage(tId) if taxId in tL: tc = len(set(aL)) logger.info("Matched %5d %s (%r)", tc, tU.getScientificName(tId), tId) numT += tc logger.info("Total matched accessions %d ", numT) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testAccessEntityPolymerFeatures(self): """Test case - access cached entity polymer info from full cache""" try: epe = EntityPolymerExtractor( self.__cfgOb, saveCachePath=self.__fullEntitySaveCachePath, useCache=True, saveCacheKwargs=self.__fullCacheKwargs) eCount = epe.getEntryCount() logger.info("Entry count %d", eCount) self.assertGreaterEqual(eCount, self.__entryLimitFull) # unpL = epe.getRefSeqAccessions("UNP") logger.info("Ref seq count %d", len(unpL)) self.assertGreaterEqual(len(unpL), 1) # testOp = False if testOp: for entryId in ["1CP9"]: for entityId in ["1", "2"]: uL = epe.getEntityRefSeqAccessions( "UNP", entryId, entityId) logger.debug("UNP for %s %s %r", entryId, entityId, uL) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testExtractEntityPolymers(self): """Fixture - extract and save entity polymer info""" try: epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs, entryLimit=self.__entryLimitTest) eCount = epe.getEntryCount() self.assertGreaterEqual(eCount, 10) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testRebuildCache(self): """Test case - extract entity polymer info - rebuild full cache of extracted entity polymer data -""" try: epe = EntityPolymerExtractor( self.__cfgOb, saveCachePath=self.__fullEntitySaveCachePath, useCache=False, saveCacheKwargs=self.__fullCacheKwargs, entryLimit=self.__entryLimitFull) eCount = epe.getEntryCount() if self.__entryLimitFull is not None: self.assertGreaterEqual(eCount, self.__entryLimitFull) else: self.assertGreaterEqual(eCount, 10) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testTaxonomyReadCache(self): """Test case - access cached entity polymer info from test cache""" try: epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs) logger.info("Cache entry count %d", epe.getEntryCount()) # obsL = [] tD = epe.getOrigTaxons() logger.info("Taxons %d", len(tD)) tU = TaxonomyProvider(taxDirPath=self.__taxonomyDataPath, useCache=True) # for entryId, taxIdL in tD.items(): for entityId, iTaxId in taxIdL: # logger.info("entryId %r entityId %r taxId %r" % (entryId, entityId, taxId)) mTaxId = tU.getMergedTaxId(iTaxId) if iTaxId != mTaxId: obsL.append({ "entryId": entryId, "entityId": entityId, "taxId": iTaxId, "replaceTaxId": mTaxId }) logger.info("Obsolete list length %d", len(obsL)) self.__mU.doExport(os.path.join(self.__workPath, "obsolete-taxons.json"), obsL, fmt="json", indent=3) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __getReferenceAssignments(self, refDbName, **kwargs): """Get all accessions assigned to input reference sequence database""" rL = [] exdbDirPath = kwargs.get("exdbDirPath", None) cacheKwargs = kwargs.get("cacheKwargs", None) useCache = kwargs.get("useCache", True) entryLimit = kwargs.get("entryLimit", None) try: epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=exdbDirPath, useCache=useCache, cacheKwargs=cacheKwargs, entryLimit=entryLimit) eCount = epe.getEntryCount() rL = epe.getRefSeqAccessions(refDbName) logger.info( "Reading polymer entity cache with repository entry count %d ref accession length %d ", eCount, len(rL)) # except Exception as e: logger.exception("Failing with %s", str(e)) return rL
def testAccessEntityPolymerFeatures(self): """Test case - access cached entity polymer info from test cache""" try: epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs) eCount = epe.getEntryCount() logger.info("Entry count %d", eCount) self.assertGreaterEqual(eCount, self.__entryLimitTest) # unpL = epe.getRefSeqAccessions("UNP") logger.info("Ref seq count %d", len(unpL)) self.assertGreaterEqual(len(unpL), 1) # for entryId in ["3RER"]: for entityId in ["1"]: uL = epe.getEntityRefSeqAccessions("UNP", entryId, entityId) logger.info("UNP for %s %s %r", entryId, entityId, uL) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()