def __fetchReferenceEntries(self,
                                refDbName,
                                idList,
                                saveText=False,
                                fetchLimit=None):
        """Fetch database entries from the input reference sequence database name."""
        dD = {"refDbName": refDbName, "refDbCache": {}, "matchInfo": {}}

        try:
            idList = idList[:fetchLimit] if fetchLimit else idList
            logger.info("Starting fetch for %d %s entries", len(idList),
                        refDbName)
            if refDbName == "UNP":
                fobj = UniProtUtils(saveText=saveText)
                refD, matchD = fobj.fetchList(idList)
                dD = {
                    "refDbName": refDbName,
                    "refDbCache": refD,
                    "matchInfo": matchD
                }

        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return dD
    def __fetchReferenceEntries(self, refDbName, idList, saveText=False, fetchLimit=None):
        """Fetch database entries from the input reference sequence database name."""
        dD = {"refDbName": refDbName, "refDbCache": {}}
        idD = {"matchInfo": {}, "refIdMap": {}}

        try:
            idList = idList[:fetchLimit] if fetchLimit else idList
            logger.info("Starting fetch for %d %s entries", len(idList), refDbName)
            if refDbName == "UniProt":
                fobj = UniProtUtils(saveText=saveText)
                logger.info("Maximum reference chunk size %d", self.__maxChunkSize)
                refD, matchD = fobj.fetchList(idList, maxChunkSize=self.__maxChunkSize)
                dD = {"refDbName": refDbName, "refDbCache": refD}
                idD = {"matchInfo": matchD}
            #
            # Check the coverage -
            #
            countD = defaultdict(int)
            logger.info("Match dictionary length %d", len(matchD))
            for _, mD in matchD.items():
                if "matched" in mD:
                    countD[mD["matched"]] += 1
            logger.info("Reference length %d match length %d coverage %r", len(refD), len(matchD), countD.items())
        except Exception as e:
            logger.exception("Failing with %s", str(e))

        return dD, idD
Exemple #3
0
 def testLookup(self):
     """Test lookup gene names"""
     try:
         uUtils = UniProtUtils(saveText=False)
         geneList = ["BCOR"]
         for gene in geneList:
             idList, retCode = uUtils.doLookup([gene], itemKey="GENENAME")
             logger.info("retCode %r rspList (%d) %r", retCode, len(idList),
                         idList)
             self.assertGreaterEqual(len(idList), 500)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #4
0
 def testGeneLookup(self):
     """Test lookup gene names for human"""
     try:
         uUtils = UniProtUtils(saveText=False)
         geneList = ["BCOR", "BCORL1"]
         for gene in geneList:
             idList, retCode = uUtils.doGeneLookup(gene, 9606)
             logger.info("retCode %r rspList (%d) %r", retCode, len(idList),
                         idList)
             self.assertGreaterEqual(len(idList), 1)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #5
0
 def testBatchFetchVariants(self):
     """Test batch variant entry fetch"""
     try:
         fobj = UniProtUtils(saveText=True)
         retD, matchD = fobj.fetchList(self.__unpIdListV,
                                       usePrimary=self.__usePrimary,
                                       retryAltApi=self.__retryAltApi)
         numPrimary, numSecondary, numNone = self.__matchSummary(matchD)
         logger.debug("%d %d %d", numPrimary, numSecondary, numNone)
         self.assertGreaterEqual(len(retD), len(self.__unpIdListV))
         if retD and self.__export:
             fobj.writeUnpXml(
                 os.path.join(self.__workPath, "variant-batch-fetch.xml"))
             # self.__dumpEntries(retD)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #6
0
 def updateList(self, dataList, procName, optionsD, workingDir):
     """Update the input list of reference sequence identifiers and return
     matching diagnostics and reference feature data.
     """
     _ = optionsD
     _ = workingDir
     saveText = optionsD.get("saveText", False)
     fetchLimit = optionsD.get("fetchLimit", None)
     refDbName = optionsD.get("refDbName", "UniProt")
     maxChunkSize = optionsD.get("maxChunkSize", 50)
     successList = []
     retList1 = []
     retList2 = []
     diagList = []
     emptyList = []
     #
     try:
         tU = TimeUtil()
         idList = dataList[:fetchLimit] if fetchLimit else dataList
         logger.info("%s starting fetch for %d %s entries", procName, len(idList), refDbName)
         if refDbName == "UniProt":
             fobj = UniProtUtils(saveText=saveText)
             logger.debug("Maximum reference chunk size %d", maxChunkSize)
             refD, matchD = fobj.fetchList(idList, maxChunkSize=maxChunkSize)
             if len(matchD) == len(idList):
                 for uId, tD in matchD.items():
                     tD["rcsb_id"] = uId.strip()
                     tD["rcsb_last_update"] = tU.getDateTimeObj(tU.getTimestamp())
                     retList1.append(tD)
                 for uId, tD in refD.items():
                     tD["rcsb_id"] = uId.strip()
                     tD["rcsb_last_update"] = tU.getDateTimeObj(tU.getTimestamp())
                     retList2.append(tD)
                 successList.extend(idList)
                 self.__updateReferenceData(self.__refDatabaseName, self.__refDataCollectionName, retList2)
                 self.__updateReferenceData(self.__refDatabaseName, self.__refMatchDataCollectionName, retList1)
             else:
                 logger.info("Failing with fetch for %d entries with matchD %r", len(idList), matchD)
         else:
             logger.error("Unsupported reference database %r", refDbName)
     except Exception as e:
         logger.exception("Failing %s for %d data items %s", procName, len(dataList), str(e))
     logger.info("%s dataList length %d success length %d rst1 %d rst2 %d", procName, len(dataList), len(successList), len(retList1), len(retList2))
     #
     return successList, emptyList, emptyList, diagList
Exemple #7
0
 def testValidateExchangeObject(self):
     """Test fetch exchange objects"""
     try:
         #
         sD = self.__mU.doImport(self.__jsonSchemaPath, "json")
         #
         fobj = UniProtUtils(saveText=False)
         idList = self.__unpIdList1
         retD, _ = fobj.fetchList(idList,
                                  usePrimary=self.__usePrimary,
                                  retryAltApi=self.__retryAltApi)
         #
         exObjD = fobj.reformat(retD, formatType="exchange")
         if exObjD and self.__export:
             for rId in exObjD:
                 self.__mU.doExport(os.path.join(self.__workPath,
                                                 rId + "-exchange.json"),
                                    exObjD[rId],
                                    fmt="json",
                                    indent=3)
         #
         Draft4Validator.check_schema(sD)
         #
         valInfo = Draft4Validator(sD, format_checker=FormatChecker())
         eCount = 0
         for rId, dD in exObjD.items():
             logger.debug("Uid %s", rId)
             try:
                 cCount = 0
                 for error in sorted(valInfo.iter_errors(dD), key=str):
                     logger.info("%s path %s error: %s", rId, error.path,
                                 error.message)
                     logger.debug(">>> failing object is %r", dD)
                     eCount += 1
                     cCount += 1
                 #
                 logger.debug("%s errors count %d", rId, cCount)
             except Exception as e:
                 logger.exception("Validator fails  %s", str(e))
         #
         logger.debug("Total errors count %d", eCount)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #8
0
 def testExchangeObject(self):
     """Test fetch exchange objects"""
     try:
         #
         fobj = UniProtUtils(saveText=False)
         idList = self.__unpIdList1
         retD, _ = fobj.fetchList(idList,
                                  usePrimary=self.__usePrimary,
                                  retryAltApi=self.__retryAltApi)
         exObjD = fobj.reformat(retD, formatType="exchange")
         if exObjD and self.__export:
             for rId in exObjD:
                 self.__mU.doExport(os.path.join(self.__workPath,
                                                 rId + "-exchange.json"),
                                    exObjD[rId],
                                    fmt="json",
                                    indent=3)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #9
0
 def testBatchFetchFailureMode1(self):
     """Test batch entry fetch (failure mode)"""
     try:
         fobj = UniProtUtils(saveText=False,
                             urlPrimary="http://none.none.none")
         idList = self.__unpIdListLong[:100]
         logger.info("idList length %d  unique %d", len(idList),
                     len(set(idList)))
         retD, matchD = fobj.fetchList(idList,
                                       maxChunkSize=len(idList),
                                       usePrimary=self.__usePrimary,
                                       retryAltApi=self.__retryAltApi)
         logger.info("IdList %d reference return length %d match length %d",
                     len(idList), len(retD), len(matchD))
         numPrimary, numSecondary, numNone = self.__matchSummary(matchD)
         logger.debug("%d %d %d", numPrimary, numSecondary, numNone)
         sumRet = numPrimary + numSecondary + numNone
         logger.info("sumRet returned %d", sumRet)
         self.assertGreaterEqual(sumRet, len(idList) - 1)
         if retD and self.__export:
             for rId in retD:
                 self.__mU.doExport(os.path.join(self.__workPath,
                                                 rId + ".json"),
                                    retD[rId],
                                    fmt="json",
                                    indent=3)
         #
         retD, matchD = fobj.fetchList(idList,
                                       usePrimary=False,
                                       retryAltApi=True)
         logger.info("IdList %d reference return length %d match length %d",
                     len(idList), len(retD), len(matchD))
         numPrimary, numSecondary, numNone = self.__matchSummary(matchD)
         logger.debug("%d %d %d", numPrimary, numSecondary, numNone)
         sumRet = numPrimary + numSecondary + numNone
         logger.info("sumRet returned %d", sumRet)
         self.assertGreaterEqual(sumRet, len(idList) - 1)
         #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #10
0
 def testFetchSequenceList(self):
     """Test fetch UniProt sequence data (FASTA)"""
     try:
         #
         fobj = UniProtUtils(saveText=False)
         # Note: this list contains one obsolete entry
         idList = self.__unpIdList1
         ok, sD = fobj.fetchSequenceList(idList,
                                         usePrimary=self.__usePrimary,
                                         retryAltApi=self.__retryAltApi)
         self.assertFalse(ok)
         self.assertEqual(len(sD), len(idList) - 1)
         if self.__export:
             self.__mU.doExport(os.path.join(self.__workPath,
                                             "data-sequences.json"),
                                sD,
                                fmt="json",
                                indent=3)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #11
0
 def testFetchVariantIds(self):
     """Test individual variant entry fetch"""
     try:
         fobj = UniProtUtils(saveText=True)
         for tId in self.__unpIdListV:
             retD, matchD = fobj.fetchList([tId],
                                           usePrimary=self.__usePrimary,
                                           retryAltApi=self.__retryAltApi)
             numPrimary, numSecondary, numNone = self.__matchSummary(matchD)
             logger.debug("%d %d %d", numPrimary, numSecondary, numNone)
             self.assertGreaterEqual(len(retD), 1)
             if retD:
                 if self.__export:
                     fobj.writeUnpXml(
                         os.path.join(self.__workPath, tId + ".xml"))
                     self.__mU.doExport(os.path.join(
                         self.__workPath, tId + ".json"),
                                        retD,
                                        fmt="json",
                                        indent=3)
                 #
                 for (eId, eDict) in retD.items():
                     if "db_isoform" in eDict and eId == tId:
                         logger.debug(
                             "------ sequence database code  %s has key db_isoform:  %r",
                             eId, eDict["db_isoform"])
                         logger.debug(
                             "------ sequence database code  %s sequence length %d",
                             eId, len(eDict["sequence"]))
                         # logger.debug("%s\n", eDict['sequence'])
                     elif eId == tId:
                         logger.debug("------ No matching isoform for %s\n",
                                      tId)
                     # for k,v in eDict.items():
                     #    logger.info("%-25s = %s\n", k, v)
             else:
                 logger.info("Fetch failed for id %s\n", tId)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemple #12
0
 def testFetchIds(self):
     """Test individual entry fetch"""
     idList = None
     try:
         fobj = UniProtUtils(saveText=True)
         for tId in self.__unpIdList3:
             idList = [tId]
             retD, matchD = fobj.fetchList(idList,
                                           usePrimary=self.__usePrimary,
                                           retryAltApi=self.__retryAltApi)
             numPrimary, numSecondary, numNone = self.__matchSummary(matchD)
             logger.debug("%d %d %d", numPrimary, numSecondary, numNone)
             #
             rematchD = fobj.rebuildMatchResultIndex(idList, retD)
             self.assertDictEqual(matchD, rematchD)
             #
             self.assertGreaterEqual(len(retD), len(idList))
             if retD and self.__export:
                 fobj.writeUnpXml(
                     os.path.join(self.__workPath, tId + ".xml"))
                 self.__mU.doExport(os.path.join(self.__workPath,
                                                 tId + ".json"),
                                    retD,
                                    fmt="json",
                                    indent=3)
     except Exception as e:
         logger.exception("Failing with idList %r %s", idList, str(e))
         self.fail()
 def getDocuments(self, formatType="exchange"):
     fobj = UniProtUtils(saveText=False)
     exObjD = fobj.reformat(self.__refD, formatType=formatType)
     return list(exObjD.values())
 def __rebuildReferenceMatchIndex(self, idList, referenceD):
     fobj = UniProtUtils()
     logger.info("Rebuilding match index on idList (%d) using reference data (%d) %r", len(idList), len(referenceD), type(referenceD))
     matchD = fobj.rebuildMatchResultIndex(idList, referenceD)
     return matchD