def testAccessEntityPolymerReadCache(self):
     """Test case - access cached entity polymer info from test cache"""
     try:
         epe = EntityPolymerExtractor(self.__cfgOb,
                                      exdbDirPath=self.__exdbCacheDirPath,
                                      useCache=False,
                                      cacheKwargs=self.__cacheKwargs)
         logger.info("Cache entry count %d", epe.getEntryCount())
         cD = epe.countRefSeqAccessions("UNP")
         self.assertGreaterEqual(len(cD), 2)
         logger.info("UNP reference sequences per entity %r",
                     dict(sorted(cD.items())))
         logger.info("Reference sequences per entity %r",
                     dict(sorted(epe.countRefSeqAccessionAny().items())))
         logger.info("Reference sequences per ref db %r",
                     dict(sorted(epe.countRefSeqAccessionDbType().items())))
         #
         ok = epe.checkRefSeqAlignRange("UNP")
         self.assertTrue(ok)
         unpL = epe.getRefSeqAccessions("UNP")
         logger.info("Unique UNP reference sequences %d", len(unpL))
         self.assertTrue(ok)
         tD = epe.getUniqueTaxons()
         logger.info("Unique taxons %d", len(tD))
         tD = epe.countRefSeqAccessionByTaxon("UNP")
         logger.info("Unique taxons %d", len(tD))
         #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
 def testTaxonomyEntityPolymerReadCache(self):
     """Test case - evaluate taxonomy - from full cache"""
     try:
         taxIdList = [562, 9606, 3701]
         for taxId in taxIdList:
             tU = TaxonomyUtils(taxDirPath=self.__workPath)
             tL = tU.getLineage(taxId)
             logger.info("Taxonomy lineage for %d %r", taxId, tL)
             #
             #
             epe = EntityPolymerExtractor(
                 self.__cfgOb,
                 saveCachePath=self.__fullEntitySaveCachePath,
                 useCache=True,
                 saveCacheKwargs=self.__fullCacheKwargs)
             logger.info("Cache entry count %d", epe.getEntryCount())
             logger.info(
                 "Reference sequences per ref db %r",
                 dict(sorted(epe.countRefSeqAccessionDbType().items())))
             rD = epe.countRefSeqAccessionByTaxon(dbNameList=["UNP"])
             logger.info("Unique taxons %d", len(list(rD.keys())))
             #
             numT = 0
             for tId, aL in rD.items():
                 tL = tU.getLineage(tId)
                 if taxId in tL:
                     tc = len(set(aL))
                     logger.info("Matched %5d %s (%r)", tc,
                                 tU.getScientificName(tId), tId)
                     numT += tc
             logger.info("Total matched accessions %d ", numT)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
 def testAccessEntityPolymerFeatures(self):
     """Test case - access cached entity polymer info from full cache"""
     try:
         epe = EntityPolymerExtractor(
             self.__cfgOb,
             saveCachePath=self.__fullEntitySaveCachePath,
             useCache=True,
             saveCacheKwargs=self.__fullCacheKwargs)
         eCount = epe.getEntryCount()
         logger.info("Entry count %d", eCount)
         self.assertGreaterEqual(eCount, self.__entryLimitFull)
         #
         unpL = epe.getRefSeqAccessions("UNP")
         logger.info("Ref seq count %d", len(unpL))
         self.assertGreaterEqual(len(unpL), 1)
         #
         testOp = False
         if testOp:
             for entryId in ["1CP9"]:
                 for entityId in ["1", "2"]:
                     uL = epe.getEntityRefSeqAccessions(
                         "UNP", entryId, entityId)
                     logger.debug("UNP for %s %s %r", entryId, entityId, uL)
             #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
 def testExtractEntityPolymers(self):
     """Fixture - extract and save entity polymer info"""
     try:
         epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs, entryLimit=self.__entryLimitTest)
         eCount = epe.getEntryCount()
         self.assertGreaterEqual(eCount, 10)
         #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
 def testRebuildCache(self):
     """Test case - extract entity polymer info - rebuild full cache of extracted entity polymer data -"""
     try:
         epe = EntityPolymerExtractor(
             self.__cfgOb,
             saveCachePath=self.__fullEntitySaveCachePath,
             useCache=False,
             saveCacheKwargs=self.__fullCacheKwargs,
             entryLimit=self.__entryLimitFull)
         eCount = epe.getEntryCount()
         if self.__entryLimitFull is not None:
             self.assertGreaterEqual(eCount, self.__entryLimitFull)
         else:
             self.assertGreaterEqual(eCount, 10)
         #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
    def testTaxonomyReadCache(self):
        """Test case - access cached entity polymer info from test cache"""
        try:
            epe = EntityPolymerExtractor(self.__cfgOb,
                                         exdbDirPath=self.__exdbCacheDirPath,
                                         useCache=False,
                                         cacheKwargs=self.__cacheKwargs)
            logger.info("Cache entry count %d", epe.getEntryCount())
            #
            obsL = []
            tD = epe.getOrigTaxons()
            logger.info("Taxons %d", len(tD))

            tU = TaxonomyProvider(taxDirPath=self.__taxonomyDataPath,
                                  useCache=True)
            #
            for entryId, taxIdL in tD.items():
                for entityId, iTaxId in taxIdL:
                    # logger.info("entryId %r entityId %r taxId %r" % (entryId, entityId, taxId))
                    mTaxId = tU.getMergedTaxId(iTaxId)
                    if iTaxId != mTaxId:
                        obsL.append({
                            "entryId": entryId,
                            "entityId": entityId,
                            "taxId": iTaxId,
                            "replaceTaxId": mTaxId
                        })
            logger.info("Obsolete list length %d", len(obsL))
            self.__mU.doExport(os.path.join(self.__workPath,
                                            "obsolete-taxons.json"),
                               obsL,
                               fmt="json",
                               indent=3)
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Ejemplo n.º 7
0
    def __getReferenceAssignments(self, refDbName, **kwargs):
        """Get all accessions assigned to input reference sequence database"""
        rL = []
        exdbDirPath = kwargs.get("exdbDirPath", None)
        cacheKwargs = kwargs.get("cacheKwargs", None)
        useCache = kwargs.get("useCache", True)
        entryLimit = kwargs.get("entryLimit", None)

        try:
            epe = EntityPolymerExtractor(self.__cfgOb,
                                         exdbDirPath=exdbDirPath,
                                         useCache=useCache,
                                         cacheKwargs=cacheKwargs,
                                         entryLimit=entryLimit)
            eCount = epe.getEntryCount()
            rL = epe.getRefSeqAccessions(refDbName)
            logger.info(
                "Reading polymer entity cache with repository entry count %d ref accession length %d ",
                eCount, len(rL))
            #
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return rL
 def testAccessEntityPolymerFeatures(self):
     """Test case - access cached entity polymer info from test cache"""
     try:
         epe = EntityPolymerExtractor(self.__cfgOb,
                                      exdbDirPath=self.__exdbCacheDirPath,
                                      useCache=False,
                                      cacheKwargs=self.__cacheKwargs)
         eCount = epe.getEntryCount()
         logger.info("Entry count %d", eCount)
         self.assertGreaterEqual(eCount, self.__entryLimitTest)
         #
         unpL = epe.getRefSeqAccessions("UNP")
         logger.info("Ref seq count %d", len(unpL))
         self.assertGreaterEqual(len(unpL), 1)
         #
         for entryId in ["3RER"]:
             for entityId in ["1"]:
                 uL = epe.getEntityRefSeqAccessions("UNP", entryId,
                                                    entityId)
                 logger.info("UNP for %s %s %r", entryId, entityId, uL)
         #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()