def __init__(self, config=None, logger=None): """ Initialize MSManager class with given configuration, logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects, and start transferor and monitoring threads. :param config: reqmgr2ms service configuration :param logger: """ self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.statusTrans = {} self.statusMon = {} # initialize transferor module if 'transferor' in self.services: self.msTransferor = MSTransferor(self.msConfig, logger=self.logger) thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.debug("### Running %s thread %s", thname, self.transfThread.running()) # initialize monitoring module if 'monitor' in self.services: self.msMonitor = MSMonitor(self.msConfig, logger=self.logger) thname = 'MSMonitor' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'], self.logger)) self.logger.debug("+++ Running %s thread %s", thname, self.monitThread.running())
def __init__(self, config=None, logger=None): """ Setup a bunch of things, like: * logger for this service * initialize all the necessary service helpers * fetch the unified configuration from central couch * update the unified configuration with some deployment and default settings * start both transfer and monitor threads :param config: reqmgr2ms service configuration :param logger: """ self.uConfig = {} self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.reqmgr2 = ReqMgr(self.msConfig['reqmgrUrl'], logger=self.logger) self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgrUrl'], httpDict={'cacheduration': 60}, logger=self.logger) # transferor has to look at workflows in assigned status self.msTransferor = MSTransferor(self.msConfig, "assigned", logger=self.logger) ### Last but not least, get the threads started thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.debug("### Running %s thread %s", thname, self.transfThread.running()) thname = 'MSTransferorMonit' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'] * 2, self.logger)) self.logger.debug("+++ Running %s thread %s", thname, self.monitThread.running())
def setUp(self): "init test class" self.msConfig = { 'verbose': False, 'group': 'DataOps', 'interval': 1 * 60, 'enableStatusTransition': True, 'reqmgrUrl': 'https://cmsweb-testbed.cern.ch/reqmgr2', 'reqmgrCacheUrl': 'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache', 'phedexUrl': 'https://cmsweb-testbed.cern.ch/phedex/datasvc/json/prod', 'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader' } self.msTransferor = MSTransferor(self.msConfig) self.taskChainTempl = getTestFile( 'data/ReqMgr/requests/Integration/TaskChain_Prod.json') self.stepChainTempl = getTestFile( 'data/ReqMgr/requests/Integration/SC_LumiMask_PhEDEx.json') super(TransferorTest, self).setUp()
class TransferorTest(EmulatedUnitTestCase): "Unit test for Transferor module" def setUp(self): "init test class" self.msConfig = { 'verbose': False, 'group': 'DataOps', 'interval': 1 * 60, 'enableStatusTransition': True, 'reqmgrUrl': 'https://cmsweb-testbed.cern.ch/reqmgr2', 'reqmgrCacheUrl': 'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache', 'phedexUrl': 'https://cmsweb-testbed.cern.ch/phedex/datasvc/json/prod', 'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader' } self.msTransferor = MSTransferor(self.msConfig) self.taskChainTempl = getTestFile( 'data/ReqMgr/requests/Integration/TaskChain_Prod.json') self.stepChainTempl = getTestFile( 'data/ReqMgr/requests/Integration/SC_LumiMask_PhEDEx.json') super(TransferorTest, self).setUp() def notestRequestRecord(self): """ Test the requestRecord method """ default = { 'name': '', 'reqStatus': None, 'SiteWhiteList': [], 'SiteBlackList': [], 'datasets': [], 'campaign': [] } self.assertItemsEqual(self.msTransferor.requestRecord({}), default) with open(self.taskChainTempl) as jo: reqData = json.load(jo)['createRequest'] expectedRes = [{ 'type': 'MCPileup', 'name': '/Neutrino_E-10_gun/RunIISummer17PrePremix-PUAutumn18_102X_upgrade2018_realistic_v15-v1/GEN-SIM-DIGI-RAW' }, { 'type': 'MCPileup', 'name': '/Neutrino_E-10_gun/RunIISummer17PrePremix-PUAutumn18_102X_upgrade2018_realistic_v15-v1/GEN-SIM-DIGI-RAW' }] resp = self.msTransferor.requestRecord(reqData)['datasets'] self.assertEqual(len(resp), 2) for idx in range(len(resp)): self.assertItemsEqual(resp[idx], expectedRes[idx]) with open(self.stepChainTempl) as jo: reqData = json.load(jo)['createRequest'] expectedRes = [{ 'type': 'InputDataset', 'name': '/RelValH125GGgluonfusion_14/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM' }, { 'type': 'MCPileup', 'name': '/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM' }, { 'type': 'MCPileup', 'name': '/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM' }] resp = self.msTransferor.requestRecord(reqData)['datasets'] self.assertEqual(len(resp), 3) for idx in range(len(resp)): self.assertItemsEqual(resp[idx], expectedRes[idx])
class MSManager(object): """ Entry point for the MicroServices. This class manages both transferor and monitoring services. """ def __init__(self, config=None, logger=None): """ Initialize MSManager class with given configuration, logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects, and start transferor and monitoring threads. :param config: reqmgr2ms service configuration :param logger: """ self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.statusTrans = {} self.statusMon = {} # initialize transferor module if 'transferor' in self.services: self.msTransferor = MSTransferor(self.msConfig, logger=self.logger) thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.debug("### Running %s thread %s", thname, self.transfThread.running()) # initialize monitoring module if 'monitor' in self.services: self.msMonitor = MSMonitor(self.msConfig, logger=self.logger) thname = 'MSMonitor' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'], self.logger)) self.logger.debug("+++ Running %s thread %s", thname, self.monitThread.running()) def _parseConfig(self, config): """ __parseConfig_ Parse the MicroService configuration and set any default values. :param config: config as defined in the deployment """ self.logger.info("Using the following MicroServices config: %s", config.dictionary_()) self.services = getattr(config, 'services', []) self.msConfig = {} self.msConfig.update(config.dictionary_()) self.msConfig.setdefault("useRucio", False) self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace( 'reqmgr2', 'couchdb/reqmgr_workload_cache') def transferor(self, reqStatus): """ MSManager transferor function. It performs Unified logic for data subscription and transfers requests from assigned to staging/staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startTime = datetime.utcnow() self.logger.info("Starting the transferor thread...") res = self.msTransferor.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total transferor execution time: %.2f secs", res['execution_time']) self.statusTrans = res def monitor(self, reqStatus): """ MSManager monitoring function. It performs transfer requests from staging to staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startTime = datetime.utcnow() self.logger.info("Starting the monitor thread...") res = self.msMonitor.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total monitor execution time: %d secs", res['execution_time']) self.statusMon = res def stop(self): "Stop MSManager" status = None # stop MSMonitor thread if 'monitor' in self.services and hasattr(self, 'monitThread'): self.monitThread.stop() status = self.monitThread.running() # stop MSTransferor thread if 'transferor' in self.services and hasattr(self, 'transfThread'): self.transfThread.stop() # stop checkStatus thread status = self.transfThread.running() return status def info(self, reqName=None): """ Return transfer information for a given request :param reqName: request name :return: data transfer information for this request """ data = {"request": reqName, "transferDoc": None} if reqName: # obtain the transfer information for a given request records from couchdb for given request if 'monitor' in self.services: transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName) elif 'transferor' in self.services: transferDoc = self.msTransferor.reqmgrAux.getTransferInfo( reqName) if transferDoc: # it's always a single document in Couch data['transferDoc'] = transferDoc[0] return data def delete(self, request): "Delete request in backend" pass def status(self, detail): """ Return the current status of a MicroService and a summary of its last execution activity. :param detail: boolean used to retrieve some extra information regarding the service :return: a dictionary """ data = {"status": "OK"} if detail and 'transferor' in self.services: data.update(self.statusTrans) elif detail and 'monitor' in self.services: data.update(self.statusMon) return data def updateTimeUTC(self, reportDict, startT, endT): """ Given a report summary dictionary and start/end time, update the report with human readable timing information :param reportDict: summary dictionary :param startT: epoch start time for a given service :param endT: epoch end time for a given service """ reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC") reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC") reportDict['execution_time'] = (endT - startT).total_seconds()
def __init__(self, config=None, logger=None): """ Initialize MSManager class with given configuration, logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects, and start transferor and monitoring threads. :param config: reqmgr2ms service configuration :param logger: """ self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.statusTrans = {} self.statusMon = {} self.statusOutput = {} self.statusRuleCleaner = {} # initialize transferor module if 'transferor' in self.services: self.msTransferor = MSTransferor(self.msConfig, logger=self.logger) thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.info("### Running %s thread %s", thname, self.transfThread.running()) # initialize monitoring module if 'monitor' in self.services: self.msMonitor = MSMonitor(self.msConfig, logger=self.logger) thname = 'MSMonitor' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'], self.logger)) self.logger.info("+++ Running %s thread %s", thname, self.monitThread.running()) # initialize output module if 'output' in self.services: reqStatus = ['closed-out', 'announced'] # thread safe cache to keep the last X requests processed in MSOutput requestNamesCached = deque( maxlen=self.msConfig.get("cacheRequestSize", 10000)) thname = 'MSOutputConsumer' self.msOutputConsumer = MSOutput(self.msConfig, mode=thname, reqCache=requestNamesCached, logger=self.logger) # set the consumer to run twice faster than the producer consumerInterval = self.msConfig['interval'] // 2 self.outputConsumerThread = start_new_thread( thname, daemon, (self.outputConsumer, reqStatus, consumerInterval, self.logger)) self.logger.info("=== Running %s thread %s", thname, self.outputConsumerThread.running()) thname = 'MSOutputProducer' self.msOutputProducer = MSOutput(self.msConfig, mode=thname, reqCache=requestNamesCached, logger=self.logger) self.outputProducerThread = start_new_thread( thname, daemon, (self.outputProducer, reqStatus, self.msConfig['interval'], self.logger)) self.logger.info("=== Running %s thread %s", thname, self.outputProducerThread.running()) # initialize rule cleaner module if 'ruleCleaner' in self.services: reqStatus = ['announced', 'aborted-completed', 'rejected'] self.msRuleCleaner = MSRuleCleaner(self.msConfig, logger=self.logger) thname = 'MSRuleCleaner' self.ruleCleanerThread = start_new_thread( thname, daemon, (self.ruleCleaner, reqStatus, self.msConfig['interval'], self.logger)) self.logger.info("--- Running %s thread %s", thname, self.ruleCleanerThread.running())
class MSManager(object): """ Entry point for the MicroServices. This class manages both transferor and monitoring services. """ def __init__(self, config=None, logger=None): """ Initialize MSManager class with given configuration, logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects, and start transferor and monitoring threads. :param config: reqmgr2ms service configuration :param logger: """ self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.statusTrans = {} self.statusMon = {} self.statusOutput = {} self.statusRuleCleaner = {} # initialize transferor module if 'transferor' in self.services: self.msTransferor = MSTransferor(self.msConfig, logger=self.logger) thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.info("### Running %s thread %s", thname, self.transfThread.running()) # initialize monitoring module if 'monitor' in self.services: self.msMonitor = MSMonitor(self.msConfig, logger=self.logger) thname = 'MSMonitor' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'], self.logger)) self.logger.info("+++ Running %s thread %s", thname, self.monitThread.running()) # initialize output module if 'output' in self.services: reqStatus = ['closed-out', 'announced'] # thread safe cache to keep the last X requests processed in MSOutput requestNamesCached = deque( maxlen=self.msConfig.get("cacheRequestSize", 10000)) thname = 'MSOutputConsumer' self.msOutputConsumer = MSOutput(self.msConfig, mode=thname, reqCache=requestNamesCached, logger=self.logger) # set the consumer to run twice faster than the producer consumerInterval = self.msConfig['interval'] // 2 self.outputConsumerThread = start_new_thread( thname, daemon, (self.outputConsumer, reqStatus, consumerInterval, self.logger)) self.logger.info("=== Running %s thread %s", thname, self.outputConsumerThread.running()) thname = 'MSOutputProducer' self.msOutputProducer = MSOutput(self.msConfig, mode=thname, reqCache=requestNamesCached, logger=self.logger) self.outputProducerThread = start_new_thread( thname, daemon, (self.outputProducer, reqStatus, self.msConfig['interval'], self.logger)) self.logger.info("=== Running %s thread %s", thname, self.outputProducerThread.running()) # initialize rule cleaner module if 'ruleCleaner' in self.services: reqStatus = ['announced', 'aborted-completed', 'rejected'] self.msRuleCleaner = MSRuleCleaner(self.msConfig, logger=self.logger) thname = 'MSRuleCleaner' self.ruleCleanerThread = start_new_thread( thname, daemon, (self.ruleCleaner, reqStatus, self.msConfig['interval'], self.logger)) self.logger.info("--- Running %s thread %s", thname, self.ruleCleanerThread.running()) def _parseConfig(self, config): """ __parseConfig_ Parse the MicroService configuration and set any default values. :param config: config as defined in the deployment """ self.logger.info("Using the following MicroServices config: %s", config.dictionary_()) self.services = getattr(config, 'services', []) self.msConfig = {} self.msConfig.update(config.dictionary_()) self.msConfig.setdefault("useRucio", False) self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace( 'reqmgr2', 'couchdb/reqmgr_workload_cache') def transferor(self, reqStatus): """ MSManager transferor function. It performs Unified logic for data subscription and transfers requests from assigned to staging/staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startTime = datetime.utcnow() self.logger.info("Starting the transferor thread...") res = self.msTransferor.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total transferor execution time: %.2f secs", res['execution_time']) self.statusTrans = res def monitor(self, reqStatus): """ MSManager monitoring function. It performs transfer requests from staging to staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startTime = datetime.utcnow() self.logger.info("Starting the monitor thread...") res = self.msMonitor.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total monitor execution time: %d secs", res['execution_time']) self.statusMon = res def outputConsumer(self, reqStatus): """ MSManager Output Data Placement function. It subscribes the output datasets to the Data Management System. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output reqStatus: Status of requests to work on """ startTime = datetime.utcnow() self.logger.info("Starting the outputConsumer thread...") res = self.msOutputConsumer.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total outputConsumer execution time: %d secs", res['execution_time']) self.statusOutput = res def outputProducer(self, reqStatus): """ MSManager MongoDB Uploader function. It uploads the documents describing a workflow output Data subscription into MongoDb. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output reqStatus: Status of requests to work on """ startTime = datetime.utcnow() self.logger.info("Starting the outputProducer thread...") res = self.msOutputProducer.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total outputProducer execution time: %d secs", res['execution_time']) self.statusOutput = res def ruleCleaner(self, reqStatus): """ MSManager ruleCleaner function. It cleans the block level Rucio rules created by WMAgent and performs request status transition from ['announced', 'aborted-completed', 'rejected'] to '{normal, aborted, rejected}-archived' state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-RuleCleaner """ startTime = datetime.utcnow() self.logger.info("Starting the ruleCleaner thread...") res = self.msRuleCleaner.execute(reqStatus) endTime = datetime.utcnow() self.updateTimeUTC(res, startTime, endTime) self.logger.info("Total ruleCleaner execution time: %d secs", res['execution_time']) self.statusRuleCleaner = res def stop(self): "Stop MSManager" status = None # stop MSMonitor thread if 'monitor' in self.services and hasattr(self, 'monitThread'): self.monitThread.stop() status = self.monitThread.running() # stop MSTransferor thread if 'transferor' in self.services and hasattr(self, 'transfThread'): self.transfThread.stop() # stop checkStatus thread status = self.transfThread.running() # stop MSOutput threads if 'output' in self.services and hasattr(self, 'outputConsumerThread'): self.outputConsumerThread.stop() status = self.outputConsumerThread.running() if 'output' in self.services and hasattr(self, 'outputProducerThread'): self.outputProducerThread.stop() status = self.outputProducerThread.running() # stop MSRuleCleaner thread if 'ruleCleaner' in self.services and hasattr(self, 'ruleCleanerThread'): self.ruleCleanerThread.stop() status = self.ruleCleanerThread.running() return status def info(self, reqName=None): """ Return transfer information for a given request :param reqName: request name :return: data transfer information for this request """ data = {"request": reqName, "transferDoc": None} if reqName: # obtain the transfer information for a given request records from couchdb for given request if 'monitor' in self.services: transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName) elif 'transferor' in self.services: transferDoc = self.msTransferor.reqmgrAux.getTransferInfo( reqName) elif 'output' in self.services: transferDoc = self.msOutputProducer.getTransferInfo(reqName) if transferDoc: # it's always a single document in Couch data['transferDoc'] = transferDoc[0] return data def delete(self, request): "Delete request in backend" pass def status(self, detail): """ Return the current status of a MicroService and a summary of its last execution activity. :param detail: boolean used to retrieve some extra information regarding the service :return: a dictionary """ data = {"status": "OK"} if detail and 'transferor' in self.services: data.update(self.statusTrans) elif detail and 'monitor' in self.services: data.update(self.statusMon) elif detail and 'output' in self.services: data.update(self.statusOutput) elif detail and 'ruleCleaner' in self.services: data.update(self.statusRuleCleaner) return data def updateTimeUTC(self, reportDict, startT, endT): """ Given a report summary dictionary and start/end time, update the report with human readable timing information :param reportDict: summary dictionary :param startT: epoch start time for a given service :param endT: epoch end time for a given service """ reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC") reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC") reportDict['execution_time'] = (endT - startT).total_seconds()
class MSManager(object): """ Entry point for the MicroServices. This class manages both transferor and monitor services/threads. """ def __init__(self, config=None, logger=None): """ Setup a bunch of things, like: * logger for this service * initialize all the necessary service helpers * fetch the unified configuration from central couch * update the unified configuration with some deployment and default settings * start both transfer and monitor threads :param config: reqmgr2ms service configuration :param logger: """ self.uConfig = {} self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.reqmgr2 = ReqMgr(self.msConfig['reqmgrUrl'], logger=self.logger) self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgrUrl'], httpDict={'cacheduration': 60}, logger=self.logger) # transferor has to look at workflows in assigned status self.msTransferor = MSTransferor(self.msConfig, "assigned", logger=self.logger) ### Last but not least, get the threads started thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.debug("### Running %s thread %s", thname, self.transfThread.running()) thname = 'MSTransferorMonit' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'] * 2, self.logger)) self.logger.debug("+++ Running %s thread %s", thname, self.monitThread.running()) def _parseConfig(self, config): """ __parseConfig_ Parse the MicroService configuration and set any default values. :param config: config as defined in the deployment """ self.logger.info("Using the following config:\n%s", config) self.msConfig = {} self.msConfig['verbose'] = getattr(config, 'verbose', False) self.msConfig['group'] = getattr(config, 'group', 'DataOps') self.msConfig['interval'] = getattr(config, 'interval', 5 * 60) self.msConfig['readOnly'] = getattr(config, 'readOnly', True) self.msConfig['reqmgrUrl'] = getattr(config, 'reqmgr2Url', 'https://cmsweb.cern.ch/reqmgr2') self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgrUrl'].replace( 'reqmgr2', 'couchdb/reqmgr_workload_cache') self.msConfig['phedexUrl'] = getattr( config, 'phedexUrl', 'https://cmsweb.cern.ch/phedex/datasvc/json/prod') self.msConfig['dbsUrl'] = getattr( config, 'dbsUrl', 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader') def transferor(self, reqStatus): """ MSManager transferor function. It performs Unified logic for data subscription and transfers requests from assigned to staging/staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startT = time.time() self.logger.info("Starting the transferor thread...") self.msTransferor.execute() self.logger.info("Total transferor execution time: %.2f secs", time.time() - startT) def monitor(self, reqStatus='staging'): """ MSManager monitoring function. It performs transfer requests from staging to staged state of ReqMgr2. For references see https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor """ startT = time.time() self.logger.info("Starting the monitor thread...") # First, fetch/update our unified configuration from reqmgr_aux db # Keep our own copy of the unified config to avoid race conditions self.uConfig = self.reqmgrAux.getUnifiedConfig(docName="config") if not self.uConfig: self.logger.warning( "Monitor failed to fetch the unified config. Skipping this cycle." ) return self.uConfig = self.uConfig[0] try: # get requests from ReqMgr2 data-service for given statue # here with detail=False we get back list of records requests = self.reqmgr2.getRequestByStatus([reqStatus], detail=False) self.logger.debug('+++ monit found %s requests in %s state', len(requests), reqStatus) requestStatus = {} # keep track of request statuses for reqName in requests: req = {'name': reqName, 'reqStatus': reqStatus} # get transfer IDs tids = self.getTransferIDs() # get transfer status transferStatuses = self.getTransferStatuses(tids) # get campaing and unified configuration campaign = self.requestCampaign(reqName) conf = self.requestConfiguration(reqName) self.logger.debug("+++ request %s campaing %s conf %s", req, campaign, conf) # if all transfers are completed, move the request status staging -> staged # completed = self.checkSubscription(request) completed = 100 # TMP if completed == 100: # all data are staged self.logger.debug( "+++ request %s all transfers are completed", req) self.change(req, 'staged', '+++ monit') # if pileup transfers are completed AND some input blocks are completed, move the request status staging -> staged elif self.pileupTransfersCompleted(tids): self.logger.debug( "+++ request %s pileup transfers are completed", req) self.change(req, 'staged', '+++ monit') # transfers not completed, just update the database with their completion else: self.logger.debug( "+++ request %s transfers are not completed", req) requestStatus[ req] = transferStatuses # TODO: implement update of transfer ids self.updateTransferIDs(requestStatus) except Exception as err: # general error self.logger.exception('+++ monit error: %s', str(err)) self.logger.info("Total monitor execution time: %.2f secs", time.time() - startT) def stop(self): "Stop MSManager" # stop MSTransferorMonit thread self.monitThread.stop() # stop MSTransferor thread self.transfThread.stop() # stop checkStatus thread status = self.transfThread.running() return status def getTransferIDsDoc(self): """ Get transfer ids document from backend. The document has the following form: { "wf_A": [record1, record2, ...], "wf_B": [....], } where each record has the following format: {"timestamp":000, "dataset":"/a/b/c", "type": "primary", "trainsferIDs": [1,2,3]} """ doc = {} return doc def updateTransferIDs(self, requestStatus): "Update transfer ids in backend" # TODO/Wait: https://github.com/dmwm/WMCore/issues/9198 # doc = self.getTransferIDsDoc() def getTransferIDs(self): "Get transfer ids from backend" # TODO/Wait: https://github.com/dmwm/WMCore/issues/9198 # meanwhile return transfer ids from internal store return [] def getTransferStatuses(self, tids): "get transfer statuses for given transfer IDs from backend" # transfer docs on backend has the following form # https://gist.github.com/amaltaro/72599f995b37a6e33566f3c749143154 statuses = {} for tid in tids: # TODO: I need to find request name from transfer ID # status = self.checkSubscription(request) status = 100 statuses[tid] = status return statuses def requestCampaign(self, req): "Return request campaign" return 'campaign_TODO' # TODO def requestConfiguration(self, req): "Return request configuration" return {} def pileupTransfersCompleted(self, tids): "Check if pileup transfers are completed" # TODO: add implementation return False def checkSubscription(self, req): "Send request to Phedex and return status of request subscription" sdict = {} for dataset in req.get('datasets', []): data = self.phedex.subscriptions(dataset=dataset, group=self.msConfig['group']) self.logger.debug("### dataset %s group %s", dataset, self.msConfig['group']) self.logger.debug("### subscription %s", data) for row in data['phedex']['dataset']: if row['name'] != dataset: continue nodes = [s['node'] for s in row['subscription']] rNodes = req.get('sites') self.logger.debug("### nodes %s %s", nodes, rNodes) subset = set(nodes) & set(rNodes) if subset == set(rNodes): sdict[dataset] = 1 else: pct = float(len(subset)) / float(len(set(rNodes))) sdict[dataset] = pct self.logger.debug("### sdict %s", sdict) tot = len(sdict.keys()) if not tot: return -1 # return percentage of completion return round(float(sum(sdict.values())) / float(tot), 2) * 100 def checkStatus(self, req): "Check status of request in local storage" self.logger.debug("### checkStatus of request: %s", req['name']) # check subscription status of the request # completed = self.checkSubscription(req) completed = 100 if completed == 100: # all data are staged self.logger.debug( "### request is completed, change its status and remove it from the store" ) self.change(req, 'staged', '### transferor') else: self.logger.debug("### request %s, completed %s", req, completed) def info(self, req): "Return info about given request" completed = self.checkSubscription(req) return {'request': req, 'status': completed} def delete(self, request): "Delete request in backend" pass def change(self, req, reqStatus, prefix='###'): """ Change request status, internally it is done via PUT request to ReqMgr2: curl -X PUT -H "Content-Type: application/json" \ -d '{"RequestStatus":"staging", "RequestName":"bla-bla"}' \ https://xxx.yyy.zz/reqmgr2/data/request """ self.logger.debug('%s updating %s status to %s', prefix, req['name'], reqStatus) try: if not self.msConfig['readOnly']: self.reqmgr2.updateRequestStatus(req['name'], reqStatus) except Exception as err: self.logger.exception("Failed to change request status. Error: %s", str(err))