def doTransform(self, **kwargs): desp = DataExchangeStatus() statusStartTimestamp = desp.setStartTime() # databaseName = kwargs.get("databaseName", "pdbx_core") collectionName = kwargs.get("collectionName", "pdbx_core_entry") selectionQueryD = kwargs.get("selectionQuery", {}) fetchLimit = kwargs.get("fetchLimit", None) tU = TimeUtil() updateId = kwargs.get("updateId", tU.getCurrentWeekSignature()) # docSelectList = self.__selectObjectIds(databaseName, collectionName, selectionQueryD) docSelectList = docSelectList[:fetchLimit] if fetchLimit else docSelectList ok = self.__transform(databaseName, collectionName, docSelectList) # if updateId: okS = self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp) return ok and okS
def testTimeStamps(self): """ Verify time stamp operations. """ try: tU = TimeUtil() tS = tU.getTimestamp(useUtc=True) logger.debug("TS (UTC) = %s(%d)", tS, len(tS)) self.assertTrue(len(tS) >= 32) # tS = tU.getTimestamp(useUtc=False) logger.debug("TS = %s(%d)", tS, len(tS)) self.assertTrue(len(tS) >= 32) # self.assertTrue(ok) wS1 = tU.getCurrentWeekSignature() logger.debug("Current week signature %s", wS1) td = datetime.date.today() wS2 = tU.getWeekSignature(td.year, td.month, td.day) logger.debug("Computed week signature %s", wS2) self.assertEqual(wS1, wS2) # tS = tU.getTimestamp(useUtc=True) logger.debug("TS (UTC) = %s(%d)", tS, len(tS)) self.assertTrue(len(tS) >= 32) dt = tU.getDateTimeObj(tS) logger.debug("Recycled DT (UTC) %s", dt.isoformat(" ")) # tS = tU.getTimestamp(useUtc=False) logger.debug("TS (local) = %s(%d)", tS, len(tS)) self.assertTrue(len(tS) >= 32) # dt = tU.getDateTimeObj(tS) logger.debug("Recycled DT (local) %s", dt.isoformat(" ")) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testTreeLoader(self): """Test case - extract entity polymer info""" try: tU = TimeUtil() updateId = tU.getCurrentWeekSignature() rhw = TreeNodeListWorker( self.__cfgOb, self.__cachePath, numProc=self.__numProc, chunkSize=self.__chunkSize, documentLimit=self.__documentLimit, verbose=self.__debugFlag, readBackCheck=self.__readBackCheck, useCache=self.__useCache, ) # ok = rhw.load(updateId, loadType=self.__loadType, doLoad=self.__doLoad) self.assertTrue(ok) # except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def main(): parser = argparse.ArgumentParser() # defaultConfigName = "site_info_configuration" # parser.add_argument( "--full", default=True, action="store_true", help="Fresh full load in a new tables/collections (Default)") # parser.add_argument("--etl_entity_sequence_clusters", default=False, action="store_true", help="ETL entity sequence clusters") parser.add_argument("--etl_repository_holdings", default=False, action="store_true", help="ETL repository holdings") # parser.add_argument("--etl_chemref", default=False, action="store_true", help="ETL integrated chemical reference data") # parser.add_argument("--etl_tree_node_lists", default=False, action='store_true', help="ETL tree node lists") parser.add_argument( "--data_set_id", default=None, help="Data set identifier (default= 2018_14 for current week)") # parser.add_argument( "--sequence_cluster_data_path", default=None, help="Sequence cluster data path (default set by configuration") parser.add_argument( "--sandbox_data_path", default=None, help="Date exchange sandboxPath data path (default set by configuration" ) # parser.add_argument("--config_path", default=None, help="Path to configuration options file") parser.add_argument("--config_name", default=defaultConfigName, help="Configuration section name") parser.add_argument("--db_type", default="mongo", help="Database server type (default=mongo)") # parser.add_argument("--document_style", default="rowwise_by_name_with_cardinality", # help="Document organization (rowwise_by_name_with_cardinality|rowwise_by_name|columnwise_by_name|rowwise_by_id|rowwise_no_name") parser.add_argument("--read_back_check", default=False, action="store_true", help="Perform read back check on all documents") # parser.add_argument("--num_proc", default=2, help="Number of processes to execute (default=2)") parser.add_argument("--chunk_size", default=10, help="Number of files loaded per process") parser.add_argument("--document_limit", default=None, help="Load document limit for testing") parser.add_argument("--prune_document_size", default=None, help="Prune large documents to this size limit (MB)") parser.add_argument("--debug", default=False, action="store_true", help="Turn on verbose logging") parser.add_argument("--mock", default=False, action="store_true", help="Use MOCK repository configuration for testing") parser.add_argument("--cache_path", default=None, help="Path containing cache directories") # parser.add_argument("--use_cache", default=False, action="store_true", help="Use cache files from remote resources") parser.add_argument("--rebuild_cache", default=False, action="store_true", help="Rebuild cached resource files") # parser.add_argument("--rebuild_schema", default=False, action="store_true", help="Rebuild schema on-the-fly if not cached") # # args = parser.parse_args() # debugFlag = args.debug if debugFlag: logger.setLevel(logging.DEBUG) # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- - # Configuration Details configPath = args.config_path configName = args.config_name # useCache = args.use_cache if not configPath: configPath = os.getenv("DBLOAD_CONFIG_PATH", None) try: if os.access(configPath, os.R_OK): os.environ["DBLOAD_CONFIG_PATH"] = configPath logger.info("Using configuation path %s (%s)", configPath, configName) else: logger.error("Missing or access issue with config file %r", configPath) exit(1) mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") if args.mock else None cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=defaultConfigName, mockTopPath=mockTopPath) if configName != defaultConfigName: cfgOb.replaceSectionName(defaultConfigName, configName) # except Exception as e: logger.error("Missing or access issue with config file %r with %s", configPath, str(e)) exit(1) # try: readBackCheck = args.read_back_check tU = TimeUtil() dataSetId = args.data_set_id if args.data_set_id else tU.getCurrentWeekSignature( ) seqDataLocator = args.sequence_cluster_data_path if args.sequence_cluster_data_path else cfgOb.getPath( "RCSB_SEQUENCE_CLUSTER_DATA_PATH", sectionName=configName) sandboxPath = args.sandbox_data_path if args.sandbox_data_path else cfgOb.getPath( "RCSB_EXCHANGE_SANDBOX_PATH", sectionName=configName) numProc = int(args.num_proc) chunkSize = int(args.chunk_size) documentLimit = int( args.document_limit) if args.document_limit else None loadType = "full" if args.full else "replace" # loadType = 'replace' if args.replace else 'full' cachePath = args.cache_path if args.cache_path else "." rebuildCache = args.rebuild_cache if args.rebuild_cache else False # rebuildSchemaFlag = args.rebuild_schema if args.rebuild_schema else False # # if args.document_style not in ['rowwise_by_name', 'rowwise_by_name_with_cardinality', 'columnwise_by_name', 'rowwise_by_id', 'rowwise_no_name']: # logger.error("Unsupported document style %s" % args.document_style) if args.db_type != "mongo": logger.error("Unsupported database server type %s", args.db_type) except Exception as e: logger.exception("Argument processing problem %s", str(e)) parser.print_help(sys.stderr) exit(1) # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- - # Rebuild or check resource cache ok = buildResourceCache(cfgOb, configName, cachePath, rebuildCache=rebuildCache) if not ok: logger.error("Cache rebuild or check failure (rebuild %r) %r", rebuildCache, cachePath) exit(1) ## if args.db_type == "mongo": if args.etl_entity_sequence_clusters: cw = SequenceClustersEtlWorker(cfgOb, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=debugFlag, readBackCheck=readBackCheck, workPath=cachePath) ok = cw.etl(dataSetId, seqDataLocator, loadType=loadType) okS = loadStatus(cw.getLoadStatus(), cfgOb, cachePath, readBackCheck=readBackCheck) if args.etl_repository_holdings: rhw = RepoHoldingsEtlWorker(cfgOb, sandboxPath, cachePath, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=debugFlag, readBackCheck=readBackCheck) ok = rhw.load(dataSetId, loadType=loadType) okS = loadStatus(rhw.getLoadStatus(), cfgOb, cachePath, readBackCheck=readBackCheck) logger.info("Operation completed with status %r " % ok and okS)
def load(self, op, **kwargs): # if not self.__cacheStatus: # logger.error("Resource cache test or rebuild has failed - exiting") # return False # argument processing if op not in [ "pdbx-loader", "etl-repository-holdings", "etl-entity-sequence-clusters" ]: logger.error("Unsupported operation %r - exiting", op) return False try: readBackCheck = kwargs.get("readBackCheck", False) numProc = int(kwargs.get("numProc", 1)) chunkSize = int(kwargs.get("chunkSize", 10)) fileLimit = int( kwargs.get("fileLimit")) if "fileLimit" in kwargs else None documentLimit = int(kwargs.get( "documentLimit")) if "documentLimit" in kwargs else None failedFilePath = kwargs.get("failFileListPath", None) loadFileListPath = kwargs.get("loadFileListPath", None) saveInputFileListPath = kwargs.get("saveFileListPath", None) schemaLevel = kwargs.get("schemaLevel", "min") if kwargs.get("schemaLevel") in [ "min", "full" ] else "min" loadType = kwargs.get("loadType", "full") # or replace updateSchemaOnReplace = kwargs.get("updateSchemaOnReplace", True) pruneDocumentSize = float( kwargs.get("pruneDocumentSize" )) if "pruneDocumentSize" in kwargs else None # "Document organization (rowwise_by_name_with_cardinality|rowwise_by_name|columnwise_by_name|rowwise_by_id|rowwise_no_name", documentStyle = kwargs.get("documentStyle", "rowwise_by_name_with_cardinality") dbType = kwargs.get("dbType", "mongo") # databaseName = kwargs.get("databaseName", None) databaseNameList = self.__cfgOb.get( "DATABASE_NAMES_ALL", sectionName="database_catalog_configuration").split(",") collectionNameList = kwargs.get("collectionNameList", None) mergeValidationReports = kwargs.get("mergeValidationReports", True) # tU = TimeUtil() dataSetId = kwargs.get( "dataSetId" ) if "dataSetId" in kwargs else tU.getCurrentWeekSignature() seqDataLocator = self.__cfgOb.getPath( "RCSB_SEQUENCE_CLUSTER_DATA_PATH", sectionName=self.__configName) sandboxPath = self.__cfgOb.getPath("RCSB_EXCHANGE_SANDBOX_PATH", sectionName=self.__configName) except Exception as e: logger.exception( "Argument and configuration processing failing with %s", str(e)) return False # if op == "pdbx-loader" and dbType == "mongo" and databaseName in databaseNameList: okS = True try: inputPathList = None if loadFileListPath: mu = MarshalUtil(workPath=self.__cachePath) inputPathList = mu.doImport(loadFileListPath, fmt="list") if not inputPathList: logger.error( "Operation %r missing or empty input file path list %s - exiting", op, loadFileListPath) return False except Exception as e: logger.exception( "Operation %r processing input path list failing with %s", op, str(e)) return False # try: mw = PdbxLoader( self.__cfgOb, self.__cachePath, resourceName="MONGO_DB", numProc=numProc, chunkSize=chunkSize, fileLimit=fileLimit, verbose=self.__debugFlag, readBackCheck=readBackCheck, ) ok = mw.load( databaseName, collectionLoadList=collectionNameList, loadType=loadType, inputPathList=inputPathList, styleType=documentStyle, dataSelectors=["PUBLIC_RELEASE"], failedFilePath=failedFilePath, saveInputFileListPath=saveInputFileListPath, pruneDocumentSize=pruneDocumentSize, validationLevel=schemaLevel, mergeContentTypes=["vrpt"] if mergeValidationReports else None, updateSchemaOnReplace=updateSchemaOnReplace, ) okS = self.loadStatus(mw.getLoadStatus(), readBackCheck=readBackCheck) except Exception as e: logger.exception("Operation %r database %r failing with %s", op, databaseName, str(e)) elif op == "etl-entity-sequence-clusters" and dbType == "mongo": cw = SequenceClustersEtlWorker(self.__cfgOb, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=self.__debugFlag, readBackCheck=readBackCheck, workPath=self.__cachePath) ok = cw.etl(dataSetId, seqDataLocator, loadType=loadType) okS = self.loadStatus(cw.getLoadStatus(), readBackCheck=readBackCheck) elif op == "etl-repository-holdings" and dbType == "mongo": rhw = RepoHoldingsEtlWorker( self.__cfgOb, sandboxPath, self.__cachePath, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=self.__debugFlag, readBackCheck=readBackCheck, ) ok = rhw.load(dataSetId, loadType=loadType) okS = self.loadStatus(rhw.getLoadStatus(), readBackCheck=readBackCheck) logger.info("Completed operation %r with status %r", op, ok and okS) return ok and okS
def main(): parser = argparse.ArgumentParser() # defaultConfigName = "site_info_configuration" parser.add_argument( "--data_set_id", default=None, help="Data set identifier (default= 2019_14 for current week)") parser.add_argument( "--full", default=True, action="store_true", help="Fresh full load in a new tables/collections (Default)") parser.add_argument("--etl_chemref", default=False, action="store_true", help="ETL integrated chemical reference data") parser.add_argument("--etl_uniprot_core", default=False, action="store_true", help="ETL UniProt core reference data") parser.add_argument("--etl_tree_node_lists", default=False, action="store_true", help="ETL tree node lists") parser.add_argument("--upd_ref_seq", default=False, action="store_true", help="Update reference sequence assignments") # parser.add_argument("--config_path", default=None, help="Path to configuration options file") parser.add_argument("--config_name", default=defaultConfigName, help="Configuration section name") parser.add_argument("--db_type", default="mongo", help="Database server type (default=mongo)") parser.add_argument("--read_back_check", default=False, action="store_true", help="Perform read back check on all documents") parser.add_argument("--num_proc", default=2, help="Number of processes to execute (default=2)") parser.add_argument("--chunk_size", default=10, help="Number of files loaded per process") parser.add_argument("--document_limit", default=None, help="Load document limit for testing") parser.add_argument("--debug", default=False, action="store_true", help="Turn on verbose logging") parser.add_argument("--mock", default=False, action="store_true", help="Use MOCK repository configuration for testing") parser.add_argument( "--cache_path", default=None, help="Top cache path for external and local resource files") parser.add_argument("--rebuild_cache", default=False, action="store_true", help="Rebuild cached files from remote resources") # parser.add_argument("--test_req_seq_cache", default=False, action="store_true", help="Test reference sequence cached files") # # args = parser.parse_args() # debugFlag = args.debug if debugFlag: logger.setLevel(logging.DEBUG) # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- - # Configuration Details configPath = args.config_path configName = args.config_name rebuildCache = args.rebuild_cache useCache = not args.rebuild_cache if not configPath: configPath = os.getenv("DBLOAD_CONFIG_PATH", None) try: if os.access(configPath, os.R_OK): os.environ["DBLOAD_CONFIG_PATH"] = configPath logger.info("Using configuation path %s (%s)", configPath, configName) else: logger.error("Missing or access issue with config file %r", configPath) exit(1) mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") if args.mock else None cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath) except Exception as e: logger.error("Missing or access issue with config file %r with %s", configPath, str(e)) exit(1) # try: readBackCheck = args.read_back_check tU = TimeUtil() dataSetId = args.data_set_id if args.data_set_id else tU.getCurrentWeekSignature( ) numProc = int(args.num_proc) chunkSize = int(args.chunk_size) documentLimit = int( args.document_limit) if args.document_limit else None loadType = "full" if args.full else "replace" cachePath = args.cache_path if args.cache_path else "." if args.db_type != "mongo": logger.error("Unsupported database server type %s", args.db_type) except Exception as e: logger.exception("Argument processing problem %s", str(e)) parser.print_help(sys.stderr) exit(1) # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- - ## # Rebuild or check resource cache okS = True ok = buildResourceCache(cfgOb, configName, cachePath, rebuildCache=rebuildCache) if not ok: logger.error("Cache rebuild or check failure (rebuild %r) %r", rebuildCache, cachePath) exit(1) # if not useCache: # buildResourceCache(cfgOb, configName, cachePath, rebuildCache=True) # if args.db_type == "mongo": if args.etl_tree_node_lists: rhw = TreeNodeListWorker(cfgOb, cachePath, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=debugFlag, readBackCheck=readBackCheck, useCache=useCache) ok = rhw.load(dataSetId, loadType=loadType) okS = loadStatus(rhw.getLoadStatus(), cfgOb, cachePath, readBackCheck=readBackCheck) if args.etl_chemref: crw = ChemRefEtlWorker(cfgOb, cachePath, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=debugFlag, readBackCheck=readBackCheck, useCache=useCache) ok = crw.load(dataSetId, extResource="DrugBank", loadType=loadType) okS = loadStatus(crw.getLoadStatus(), cfgOb, cachePath, readBackCheck=readBackCheck) if args.etl_uniprot_core: crw = UniProtCoreEtlWorker(cfgOb, cachePath, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=debugFlag, readBackCheck=readBackCheck, useCache=useCache) ok = crw.load(dataSetId, extResource="UniProt", loadType=loadType) okS = loadStatus(crw.getLoadStatus(), cfgOb, cachePath, readBackCheck=readBackCheck) if args.upd_ref_seq: databaseName = "pdbx_core" collectionName = "pdbx_core_polymer_entity" polymerType = "Protein" ok = doReferenceSequenceUpdate(cfgOb, databaseName, collectionName, polymerType, cachePath, useCache, fetchLimit=documentLimit, refChunkSize=100) okS = ok # logger.info("Operation completed with status %r " % ok and okS)
class DataExchangeStatus(object): """ Create status records for data exchange operations. For example, loop_ _rcsb_data_exchange_status.update_id _rcsb_data_exchange_status.database _rcsb_data_exchange_status.object _rcsb_data_exchange_status.update_status_flag _rcsb_data_exchange_status.update_begin_timestamp _rcsb_data_exchange_status.update_end_timestamp 2018_23 chem_comp_v5 chem_comp Y '2018-07-11 11:51:37.958508+00:00' '2018-07-11 11:55:03.966508+00:00' # ... abbreviated ... """ def __init__(self, **kwargs): self.__startTimestamp = None self.__endTimestamp = None self.__updateId = "unset" self.__statusFlag = "N" self.__databaseName = "unset" self.__objectName = "unset" self.__tU = TimeUtil() self.__kwargs = kwargs def setObject(self, databaseName, objectName): """Set the object for current status record. Args: databaseName (str): database container name objectName (str): object name (collection/table) within database Returns: bool: True for success or False otherwise """ try: self.__databaseName = databaseName self.__objectName = objectName return True except Exception as e: logger.exception("Failing with %s", str(e)) return False def setStartTime(self, tS=None, useUtc=True): """Set the start time for the current exchange operation. Args: tS (str, optional): timestamp for the start of the update operation (default=current time) useUtc (bool, optional): Report times in UTC Returns: str: isoformat timestamp or None otherwise """ try: self.__startTimestamp = tS if tS else self.__tU.getTimestamp( useUtc=useUtc) return self.__startTimestamp except Exception as e: logger.exception("Failing with %s", str(e)) return None def setEndTime(self, tS=None, useUtc=True): """Set the end time for the current exchange operation. Args: tS (str, optional): timestamp for the end of the update operation (default=current time) useUtc (bool, optional): Report times in UTC Returns: str: isoformat timestamp or None otherwise """ try: self.__endTimestamp = tS if tS else self.__tU.getTimestamp( useUtc=useUtc) return self.__endTimestamp except Exception as e: logger.exception("Failing with %s", str(e)) return None def setStatus(self, updateId=None, successFlag="Y"): """Set the update identifier (yyyy_<week_in_year>) and success flag for the current exchange operation. Args: updateId (str, optional): Update identifier (default=yyyy_<week_in_year>) successFlag (str, optional): 'Y'/'N' Returns: bool: True for success or False otherwise """ try: self.__statusFlag = successFlag self.__updateId = updateId if updateId else self.__tU.getCurrentWeekSignature( ) return True except Exception as e: logger.exception("Failing with %s", str(e)) return False def getStatus(self, useTimeStrings=False): """Get the current data exchange status document. Returns: dict: Updated list of status records including the appended current record """ try: if useTimeStrings: sD = { "update_id": self.__updateId, "database_name": self.__databaseName, "object_name": self.__objectName, "update_status_flag": self.__statusFlag, "update_begin_timestamp": self.__startTimestamp, "update_end_timestamp": self.__endTimestamp, } else: sD = { "update_id": self.__updateId, "database_name": self.__databaseName, "object_name": self.__objectName, "update_status_flag": self.__statusFlag, "update_begin_timestamp": self.__tU.getDateTimeObj(self.__startTimestamp), "update_end_timestamp": self.__tU.getDateTimeObj(self.__endTimestamp), } return sD except Exception as e: logger.exception("Failing with %s", str(e)) return {}
def load(self, op, **kwargs): logger.info("Starting operation %r\n", op) if not self.__cacheStatus: logger.error("Resource cache test or rebuild has failed - exiting") return False # argument processing if op not in ["etl_tree_node_lists", "etl_chemref", "etl_uniprot_core", "upd_ref_seq", "upd_ref_seq_comp_models", "refresh_pubchem"]: logger.error("Unsupported operation %r - exiting", op) return False try: # test mode and UniProt accession primary match minimum count for doReferenceSequenceUpdate() testMode = kwargs.get("testMode", False) minMatchPrimaryPercent = kwargs.get("minMatchPrimaryPercent", None) minMissing = kwargs.get("minMissing", 0) # readBackCheck = kwargs.get("readBackCheck", False) numProc = int(kwargs.get("numProc", 1)) chunkSize = int(kwargs.get("chunkSize", 10)) refChunkSize = int(kwargs.get("refChunkSize", 100)) documentLimit = int(kwargs.get("documentLimit")) if "documentLimit" in kwargs else None loadType = kwargs.get("loadType", "full") # or replace dbType = kwargs.get("dbType", "mongo") tU = TimeUtil() dataSetId = kwargs.get("dataSetId") if "dataSetId" in kwargs else tU.getCurrentWeekSignature() # Rebuild or reuse reference sequence cache rebuildSequenceCache = kwargs.get("rebuildSequenceCache", False) useSequenceCache = not rebuildSequenceCache # except Exception as e: logger.exception("Argument or configuration processing failing with %s", str(e)) return False # okS = ok = False if dbType == "mongo": if op == "etl_tree_node_lists": rhw = TreeNodeListWorker( self.__cfgOb, self.__cachePath, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=self.__debugFlag, readBackCheck=readBackCheck, useCache=self.__useCache, ) ok = rhw.load(dataSetId, loadType=loadType) okS = self.loadStatus(rhw.getLoadStatus(), readBackCheck=readBackCheck) elif op == "etl_chemref": crw = ChemRefEtlWorker( self.__cfgOb, self.__cachePath, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=self.__debugFlag, readBackCheck=readBackCheck, useCache=self.__useCache, ) ok = crw.load(dataSetId, extResource="DrugBank", loadType=loadType) okS = self.loadStatus(crw.getLoadStatus(), readBackCheck=readBackCheck) elif op == "etl_uniprot_core": crw = UniProtCoreEtlWorker( self.__cfgOb, self.__cachePath, numProc=numProc, chunkSize=chunkSize, documentLimit=documentLimit, verbose=self.__debugFlag, readBackCheck=readBackCheck, useCache=self.__useCache, ) ok = crw.load(dataSetId, extResource="UniProt", loadType=loadType) okS = self.loadStatus(crw.getLoadStatus(), readBackCheck=readBackCheck) elif op == "upd_ref_seq": databaseName = "pdbx_core" collectionName = "pdbx_core_polymer_entity" polymerType = "Protein" ok = self.doReferenceSequenceUpdate( databaseName, collectionName, polymerType, fetchLimit=documentLimit, useSequenceCache=useSequenceCache, testMode=testMode, minMatchPrimaryPercent=minMatchPrimaryPercent, minMissing=minMissing, refChunkSize=refChunkSize, ) okS = ok elif op == "upd_ref_seq_comp_models": databaseName = "pdbx_comp_model_core" collectionName = "pdbx_comp_model_core_polymer_entity" polymerType = "Protein" ok = self.doReferenceSequenceUpdate( databaseName, collectionName, polymerType, fetchLimit=documentLimit, useSequenceCache=useSequenceCache, testMode=testMode, minMatchPrimaryPercent=minMatchPrimaryPercent, minMissing=minMissing, refChunkSize=refChunkSize, ) okS = ok # logger.info("Completed operation %r with status %r\n", op, ok and okS) return ok and okS