def testCache(self, minMatchPrimaryPercent=None, logSizes=False):
     okC = True
     logger.info("Reference cache lengths: refIdMap %d matchD %d refD %d", len(self.__refIdMapD), len(self.__matchD), len(self.__refD))
     ok = bool(self.__refIdMapD and self.__matchD and self.__refD)
     #
     numRef = len(self.__refIdMapD)
     countD = defaultdict(int)
     logger.info("Match dictionary length %d", len(self.__matchD))
     for _, mD in self.__matchD.items():
         if "matched" in mD:
             countD[mD["matched"]] += 1
     logger.info("Reference length %d match length %d coverage %r", len(self.__refD), len(self.__matchD), countD.items())
     if minMatchPrimaryPercent:
         try:
             okC = 100.0 * float(countD["primary"]) / float(numRef) > minMatchPrimaryPercent
         except Exception:
             okC = False
         logger.info("Primary reference match percent test status %r", okC)
     #
     if logSizes:
         logger.info(
             "Pfam %.2f InterPro %.2f SIFTS %.2f GO %.2f EC %.2f RefIdMap %.2f RefMatchD %.2f RefD %.2f",
             getObjSize(self.__pfP) / 1000000.0,
             getObjSize(self.__ipP) / 1000000.0,
             getObjSize(self.__ssP) / 1000000.0,
             getObjSize(self.__goP) / 1000000.0,
             getObjSize(self.__ecP) / 1000000.0,
             getObjSize(self.__refIdMapD) / 1000000.0,
             getObjSize(self.__matchD) / 1000000.0,
             getObjSize(self.__refD) / 1000000.0,
         )
     return ok and okC
 def testCache(self, minCount=None, logSizes=False):
     if logSizes and self.__ccIdxD:
         logger.info("ccIdxD (%.2f MB)",
                     getObjSize(self.__ccIdxD) / 1000000.0)
     ok = self.__ccIdxD and len(
         self.__ccIdxD
     ) >= minCount if minCount else self.__ccIdxD is not None
     return ok
Exemple #3
0
    def testCache(self, minMatchPrimaryPercent=None, logSizes=False, minMissing=0):
        """Test the state of reference sequence data relative to proportion of matched primary sequence
        in the primary data set.

        Args:
            minMatchPrimaryPercent (float, optional): minimal acceptable of matching primary accessions. Defaults to None.
            logSizes (bool, optional): flag to log resource sizes. Defaults to False.
            minMissing (int, optional):  minimum acceptable missing matched reference Ids. Defaults to 0.

        Returns:
            bool: True for success or False otherwise
        """
        try:
            ok = bool(self.__matchD and self.__refD and self.__missingMatchIds <= minMissing)
            logger.info("Reference cache lengths: matchD %d refD %d missing matches %d", len(self.__matchD), len(self.__refD), self.__missingMatchIds)
            if ok:
                return ok
        except Exception as e:
            logger.error("Failing with unexpected cache state %s", str(e))
            return False
        #
        # -- The remaining check on the portion is not currently --
        #
        numRef = len(self.__matchD)
        countD = defaultdict(int)
        logger.info("Match dictionary length %d", len(self.__matchD))
        for _, mD in self.__matchD.items():
            if "matched" in mD:
                countD[mD["matched"]] += 1
        logger.info("Reference length %d match length %d coverage %r", len(self.__refD), len(self.__matchD), countD.items())
        if minMatchPrimaryPercent:
            try:
                okC = 100.0 * float(countD["primary"]) / float(numRef) > minMatchPrimaryPercent
            except Exception:
                okC = False
            logger.info("Primary reference match count test status %r", okC)
        #
        if logSizes:
            logger.info(
                "RefMatchD %.2f RefD %.2f",
                getObjSize(self.__matchD) / 1000000.0,
                getObjSize(self.__refD) / 1000000.0,
            )
        return ok and okC
Exemple #4
0
 def testCache(self, minCount=None, logSizes=False):
     okC = bool(self.__refD)
     if not okC:
         return okC
     logger.info("Reference data cache lengths: refD %d", len(self.__refD))
     if minCount and len(self.__refD) < minCount:
         return False
     #
     if logSizes:
         logger.info("refD %.2f", getObjSize(self.__refD) / 1000000.0)
     return True
 def testCache(self, minMatch=None, logSizes=False):
     self.getMatchData()
     okC = bool(self.__matchD)
     if not okC:
         return okC
     logger.info("Reference data cache lengths: matchD %d",
                 len(self.__matchD))
     if minMatch and len(self.__matchD) < minMatch:
         return False
     #
     if logSizes:
         logger.info("PubChem MatchD %.2f",
                     getObjSize(self.__matchD) / 1000000.0)
     return True
 def testCache(self, minCount=None, logSizes=False):
     if logSizes and self.__ccMolD:
         logger.info("ccMolD object size %.2f MB", getObjSize(self.__ccMolD) / 1000000.0)
     ok = self.__ccMolD and len(self.__ccMolD) >= minCount if minCount else self.__ccMolD is not None
     return ok
Exemple #7
0
 def testCache(self, minCount=None, logSizes=False):
     if logSizes and self.__searchIdx:
         logger.info("searchIdxD (%.2f MB)", getObjSize(self.__searchIdx) / 1000000.0)
     ok = self.__searchIdx and len(self.__searchIdx) >= minCount if minCount else self.__searchIdx is not None
     return ok