def testLatestJobData(self): self.assertEqual(20, len(DataCache.getlatestJobData())) self.assertItemsEqual(['time', 'data'], DataCache._lastedActiveDataFromAgent.keys()) DataCache.setlatestJobData("ALAN") self.assertEqual("ALAN", DataCache.getlatestJobData()) self.assertItemsEqual(['time', 'data'], DataCache._lastedActiveDataFromAgent.keys())
def testLatestJobData(self): self.assertEqual(20, len(DataCache.getlatestJobData())) self.assertItemsEqual(['time', 'data'], list(DataCache._lastedActiveDataFromAgent)) DataCache.setlatestJobData("ALAN") self.assertEqual("ALAN", DataCache.getlatestJobData()) self.assertItemsEqual(['time', 'data'], list(DataCache._lastedActiveDataFromAgent))
def testFilterDataByRequest(self): data = list( DataCache.filterDataByRequest(filterDict={}, maskList='RequestType')) self.assertEqual(20, len(data)) self.assertItemsEqual(['RequestName', 'RequestType'], list(data[0])) reqTypes = [item['RequestType'] for item in data] self.assertItemsEqual([ 'ReReco', 'MonteCarlo', 'StepChain', 'MonteCarloFromGEN', 'ReDigi', 'TaskChain', 'DQMHarvest' ], set(list(reqTypes))) data = list( DataCache.filterDataByRequest(filterDict={}, maskList=['Campaign', 'RequestType'])) self.assertEqual(20, len(data)) data = list( DataCache.filterDataByRequest( filterDict={'IncludeParents': 'True'}, maskList=['Campaign'])) self.assertEqual(2, len(data)) data = list( DataCache.filterDataByRequest( filterDict={'Campaign': 'CMSSW_9_4_0__test2inwf-1510737328'}, maskList=['RequestName'])) self.assertEqual(1, len(data)) self.assertEqual( "amaltaro_TaskChain_InclParents_HG1812_Validation_181203_121005_1483", data[0]['RequestName'])
def get(self): # This assumes both the DataCache and the parentage cache list # get periodically updated. # In case of problems, the WMStats cherrypy threads logs need to be checked if DataCache.isEmpty(): raise DataCacheEmpty() else: return rows(DataCache.getParentDatasetList())
def get(self): # This assumes DataCahe is periodically updated. # If data is not updated, need to check, dataCacheUpdate log if DataCache.isEmpty(): raise DataCacheEmpty() else: return rows( DataCache.filterData(ACTIVE_STATUS_FILTER, ["OutputModulesLFNBases"]))
def get(self): # This assumes DataCahe is periodically updated. # If data is not updated, need to check, dataCacheUpdate log if DataCache.isEmpty(): raise DataCacheEmpty() else: return rows( DataCache.filterData(ACTIVE_NO_CLOSEOUT_FILTER, [ "InputDataset", "OutputDatasets", "MCPileup", "DataPileup" ]))
def gatherT0ActiveDataStats(self, config): """ gather active data statistics """ try: if DataCache.islatestJobDataExpired(): wmstatsDB = WMStatsReader(config.wmstats_url, config.reqmgrdb_url, reqdbCouchApp = "T0Request") jobData = wmstatsDB.getT0ActiveData(jobInfoFlag = True) DataCache.setlatestJobData(jobData) self.logger.info("DataCache is updated: %s" % len(jobData)) except Exception as ex: self.logger.error(str(ex)) return
def gatherActiveDataStats(self, config): """ gather active data statistics """ try: if DataCache.islatestJobDataExpired(): wmstatsDB = WMStatsReader(config.wmstats_url, config.reqmgrdb_url, reqdbCouchApp = "ReqMgr") jobData = wmstatsDB.getActiveData(jobInfoFlag = True) DataCache.setlatestJobData(jobData) except Exception as ex: cherrypy.log.error(str(ex)) return
def gatherT0ActiveDataStats(self, config): """ gather active data statistics """ try: if DataCache.islatestJobDataExpired(): wmstatsDB = WMStatsReader(config.wmstats_url, reqdbURL=config.reqmgrdb_url, reqdbCouchApp = "T0Request") jobData = wmstatsDB.getT0ActiveData(jobInfoFlag = True) DataCache.setlatestJobData(jobData) self.logger.info("DataCache is updated: %s", len(jobData)) except Exception as ex: self.logger.error(str(ex)) return
def gatherActiveDataStats(self, config): """ gather active data statistics """ try: if DataCache.islatestJobDataExpired(): wmstatsDB = WMStatsReader(config.wmstats_url, config.reqmgrdb_url, reqdbCouchApp="ReqMgr") jobData = wmstatsDB.getActiveData(jobInfoFlag=True) DataCache.setlatestJobData(jobData) except Exception as ex: cherrypy.log.error(str(ex)) return
def testLatestJobDataExpired(self): self.assertFalse(DataCache.islatestJobDataExpired()) DataCache.setDuration(-1) self.assertTrue(DataCache.islatestJobDataExpired()) DataCache.setDuration(300) self.assertFalse(DataCache.islatestJobDataExpired()) DataCache._lastedActiveDataFromAgent = {} self.assertTrue(DataCache.islatestJobDataExpired()) self.assertEqual({}, DataCache.getlatestJobData())
def testFilterData(self): data = list(DataCache.filterData(filterDict={}, maskList=['RequestType'])) self.assertEqual(20, len(data)) self.assertItemsEqual(['ReReco', 'MonteCarlo', 'StepChain', 'MonteCarloFromGEN', 'ReDigi', 'TaskChain', 'DQMHarvest'], set(list(data))) data = list(DataCache.filterData(filterDict={}, maskList=['Campaign', 'RequestType'])) self.assertEqual(40, len(data)) data = list(DataCache.filterData(filterDict={'IncludeParents': 'True'}, maskList=['Campaign'])) self.assertEqual(2, len(data)) data = list(DataCache.filterData(filterDict={'Campaign': 'CMSSW_9_4_0__test2inwf-1510737328'}, maskList=['RequestName'])) self.assertItemsEqual(["amaltaro_TaskChain_InclParents_HG1812_Validation_181203_121005_1483"], data)
def setUp(self): self.fileCache = os.path.join(os.path.dirname(__file__), 'DataCache.json') with open(self.fileCache) as jo: data = json.load(jo) DataCache().setlatestJobData(data) if PY3: self.assertItemsEqual = self.assertCountEqual
def gatherActiveDataStats(): wmstats_url = "https://cmsweb.cern.ch/couchdb/wmstats" reqmgrdb_url = "https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache" jobInfoFlag = False tStart = time.time() try: if DataCache.islatestJobDataExpired(): wmstatsDB = WMStatsReader(wmstats_url, reqdbURL=reqmgrdb_url, reqdbCouchApp="ReqMgr") jobData = wmstatsDB.getActiveData(jobInfoFlag=jobInfoFlag) DataCache.setlatestJobData(jobData) print("DataCache is updated: {}".format(len(jobData))) else: print("DataCache is up-to-date") except Exception as ex: print("Exception updating cache. Details: {}\nTraceback: {}".format( str(ex), str(traceback.format_exc()))) print("Total time executing this cycle: {}".format(time.time() - tStart))
def gatherActiveDataStats(self, config): """ gather active data statistics """ self.logger.info("Starting gatherActiveDataStats with jobInfo set to: %s", self.getJobInfo) try: tStart = time.time() if DataCache.islatestJobDataExpired(): wmstatsDB = WMStatsReader(config.wmstats_url, reqdbURL=config.reqmgrdb_url, reqdbCouchApp="ReqMgr", logger=self.logger) self.logger.info("Getting active data with job info for statuses: %s", WMSTATS_JOB_INFO) jobData = wmstatsDB.getActiveData(WMSTATS_JOB_INFO, jobInfoFlag=self.getJobInfo) self.logger.info("Getting active data with NO job info for statuses: %s", WMSTATS_NO_JOB_INFO) tempData = wmstatsDB.getActiveData(WMSTATS_NO_JOB_INFO, jobInfoFlag=False) jobData.update(tempData) self.logger.info("Running setlatestJobData...") DataCache.setlatestJobData(jobData) self.logger.info("DataCache is up-to-date with %d requests data", len(jobData)) except Exception as ex: self.logger.exception("Exception updating DataCache. Error: %s", str(ex)) self.logger.info("Total time loading data from ReqMgr2 and WMStats: %s", time.time() - tStart) return
def fetchIncludeParentsRequests(self, config): """ Fetch active requests from the DataCache that have IncludeParents=True """ # use this boolean to signal whether there were datasets that failed # to get their parentage resolved incompleteParentage = False setDsets = set() setParents = set() self.logger.info("Executing parent lock cherrypy thread") for inputDset in DataCache.filterData(ACTIVE_NO_CLOSEOUT_PARENT_FILTER, ["InputDataset"]): setDsets.add(inputDset) self.logger.info( "Found %d unique datasets requiring the parent dataset", len(setDsets)) for dset in setDsets: try: res = self.dbs.listDatasetParents(dset) except Exception as exc: self.logger.warning( "Failed to resolve parentage for: %s. Error: %s", dset, str(exc)) incompleteParentage = True continue self.logger.info("Resolved parentage for: %s", res) if res: setParents.add(res[0]['parent_dataset']) if not incompleteParentage: DataCache.setParentDatasetList(list(setParents)) self.logger.info("Parentage lookup complete and cache renewed") else: # then don't replace any data for the moment, simply add new parents previousData = set(DataCache.getParentDatasetList()) setParents = setParents | previousData DataCache.setParentDatasetList(list(setParents)) self.logger.info("Parentage lookup complete and cache updated") return
def get(self, mask=None, **input_condition): # This assumes DataCahe is periodically updated. # If data is not updated, need to check, dataCacheUpdate log return rows(DataCache.filterDataByRequest(input_condition, mask))
from WMCore.Services.WMStats.WMStatsReader import WMStatsReader @profile def gatherActiveDataStats(): wmstats_url = "https://cmsweb.cern.ch/couchdb/wmstats" reqmgrdb_url = "https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache" jobInfoFlag = False tStart = time.time() try: if DataCache.islatestJobDataExpired(): wmstatsDB = WMStatsReader(wmstats_url, reqdbURL=reqmgrdb_url, reqdbCouchApp="ReqMgr") jobData = wmstatsDB.getActiveData(jobInfoFlag=jobInfoFlag) DataCache.setlatestJobData(jobData) print("DataCache is updated: {}".format(len(jobData))) else: print("DataCache is up-to-date") except Exception as ex: print("Exception updating cache. Details: {}\nTraceback: {}".format( str(ex), str(traceback.format_exc()))) print("Total time executing this cycle: {}".format(time.time() - tStart)) if __name__ == "__main__": DataCache.setDuration(100) while True: gatherActiveDataStats() time.sleep(60)
def testDuration(self): self.assertEqual(300, DataCache.getDuration()) DataCache.setDuration(100) self.assertEqual(100, DataCache.getDuration())
def setUp(self): self.fileCache = os.path.join(os.path.dirname(__file__), 'DataCache.json') with open(self.fileCache) as jo: data = json.load(jo) DataCache().setlatestJobData(data)
def get(self): results = DataCache.getlatestJobData() if results == None or DataCache.islatestJobDataExpired(): results = self.wmstats.getActiveData(jobInfoFlag = True) return rows([results])
def get(self): # This assumes DataCahe is periodically updated. # If data is not updated, need to check, dataCacheUpdate log return rows([DataCache.getlatestJobData()])
def get(self): # This assumes DataCahe is periodically updated. # If data is not updated, need to check, dataCacheUpdate log return rows(DataCache.getProtectedLFNs())
def get(self): # This assumes DataCahe is periodically updated. # If data is not updated, need to check, dataCacheUpdate log return rows(DataCache.filterData(ACTIVE_STATUS_FILTER, ["OutputModulesLFNBases"]))
def get(self): # This assumes DataCahe is periodically updated. # If data is not updated, need to check, dataCacheUpdate log return rows(DataCache.filterData(ACTIVE_NO_CLOSEOUT_FILTER, ["InputDataset", "OutputDatasets", "MCPileup", "DataPileup"]))
def get(self): results = DataCache.getlatestJobData() if results == None or DataCache.islatestJobDataExpired(): results = self.wmstats.getActiveData(jobInfoFlag=True) return rows([results])