Example #1
0
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.debug("### Running %s thread %s", thname,
                              self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.debug("+++ Running %s thread %s", thname,
                              self.monitThread.running())
Example #2
0
    def __init__(self, config=None, logger=None):
        """
        Setup a bunch of things, like:
         * logger for this service
         * initialize all the necessary service helpers
         * fetch the unified configuration from central couch
         * update the unified configuration with some deployment and default settings
         * start both transfer and monitor threads
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.uConfig = {}
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)

        self.reqmgr2 = ReqMgr(self.msConfig['reqmgrUrl'], logger=self.logger)
        self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgrUrl'],
                                   httpDict={'cacheduration': 60},
                                   logger=self.logger)

        # transferor has to look at workflows in assigned status
        self.msTransferor = MSTransferor(self.msConfig,
                                         "assigned",
                                         logger=self.logger)

        ### Last but not least, get the threads started
        thname = 'MSTransferor'
        self.transfThread = start_new_thread(
            thname, daemon, (self.transferor, 'assigned',
                             self.msConfig['interval'], self.logger))
        self.logger.debug("### Running %s thread %s", thname,
                          self.transfThread.running())

        thname = 'MSTransferorMonit'
        self.monitThread = start_new_thread(
            thname, daemon, (self.monitor, 'staging',
                             self.msConfig['interval'] * 2, self.logger))
        self.logger.debug("+++ Running %s thread %s", thname,
                          self.monitThread.running())
Example #3
0
    def setUp(self):
        "init test class"
        self.msConfig = {
            'verbose': False,
            'group': 'DataOps',
            'interval': 1 * 60,
            'enableStatusTransition': True,
            'reqmgrUrl': 'https://cmsweb-testbed.cern.ch/reqmgr2',
            'reqmgrCacheUrl':
            'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache',
            'phedexUrl':
            'https://cmsweb-testbed.cern.ch/phedex/datasvc/json/prod',
            'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader'
        }

        self.msTransferor = MSTransferor(self.msConfig)

        self.taskChainTempl = getTestFile(
            'data/ReqMgr/requests/Integration/TaskChain_Prod.json')
        self.stepChainTempl = getTestFile(
            'data/ReqMgr/requests/Integration/SC_LumiMask_PhEDEx.json')
        super(TransferorTest, self).setUp()
Example #4
0
class TransferorTest(EmulatedUnitTestCase):
    "Unit test for Transferor module"

    def setUp(self):
        "init test class"
        self.msConfig = {
            'verbose': False,
            'group': 'DataOps',
            'interval': 1 * 60,
            'enableStatusTransition': True,
            'reqmgrUrl': 'https://cmsweb-testbed.cern.ch/reqmgr2',
            'reqmgrCacheUrl':
            'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache',
            'phedexUrl':
            'https://cmsweb-testbed.cern.ch/phedex/datasvc/json/prod',
            'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader'
        }

        self.msTransferor = MSTransferor(self.msConfig)

        self.taskChainTempl = getTestFile(
            'data/ReqMgr/requests/Integration/TaskChain_Prod.json')
        self.stepChainTempl = getTestFile(
            'data/ReqMgr/requests/Integration/SC_LumiMask_PhEDEx.json')
        super(TransferorTest, self).setUp()

    def notestRequestRecord(self):
        """
        Test the requestRecord method
        """
        default = {
            'name': '',
            'reqStatus': None,
            'SiteWhiteList': [],
            'SiteBlackList': [],
            'datasets': [],
            'campaign': []
        }
        self.assertItemsEqual(self.msTransferor.requestRecord({}), default)

        with open(self.taskChainTempl) as jo:
            reqData = json.load(jo)['createRequest']
        expectedRes = [{
            'type':
            'MCPileup',
            'name':
            '/Neutrino_E-10_gun/RunIISummer17PrePremix-PUAutumn18_102X_upgrade2018_realistic_v15-v1/GEN-SIM-DIGI-RAW'
        }, {
            'type':
            'MCPileup',
            'name':
            '/Neutrino_E-10_gun/RunIISummer17PrePremix-PUAutumn18_102X_upgrade2018_realistic_v15-v1/GEN-SIM-DIGI-RAW'
        }]
        resp = self.msTransferor.requestRecord(reqData)['datasets']
        self.assertEqual(len(resp), 2)
        for idx in range(len(resp)):
            self.assertItemsEqual(resp[idx], expectedRes[idx])

        with open(self.stepChainTempl) as jo:
            reqData = json.load(jo)['createRequest']
        expectedRes = [{
            'type':
            'InputDataset',
            'name':
            '/RelValH125GGgluonfusion_14/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM'
        }, {
            'type':
            'MCPileup',
            'name':
            '/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM'
        }, {
            'type':
            'MCPileup',
            'name':
            '/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM'
        }]
        resp = self.msTransferor.requestRecord(reqData)['datasets']
        self.assertEqual(len(resp), 3)
        for idx in range(len(resp)):
            self.assertItemsEqual(resp[idx], expectedRes[idx])
Example #5
0
class MSManager(object):
    """
    Entry point for the MicroServices.
    This class manages both transferor and monitoring services.
    """
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.debug("### Running %s thread %s", thname,
                              self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.debug("+++ Running %s thread %s", thname,
                              self.monitThread.running())

    def _parseConfig(self, config):
        """
        __parseConfig_
        Parse the MicroService configuration and set any default values.
        :param config: config as defined in the deployment
        """
        self.logger.info("Using the following MicroServices config: %s",
                         config.dictionary_())
        self.services = getattr(config, 'services', [])

        self.msConfig = {}
        self.msConfig.update(config.dictionary_())
        self.msConfig.setdefault("useRucio", False)

        self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace(
            'reqmgr2', 'couchdb/reqmgr_workload_cache')

    def transferor(self, reqStatus):
        """
        MSManager transferor function.
        It performs Unified logic for data subscription and
        transfers requests from assigned to staging/staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the transferor thread...")
        res = self.msTransferor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total transferor execution time: %.2f secs",
                         res['execution_time'])
        self.statusTrans = res

    def monitor(self, reqStatus):
        """
        MSManager monitoring function.
        It performs transfer requests from staging to staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the monitor thread...")
        res = self.msMonitor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total monitor execution time: %d secs",
                         res['execution_time'])
        self.statusMon = res

    def stop(self):
        "Stop MSManager"
        status = None
        # stop MSMonitor thread
        if 'monitor' in self.services and hasattr(self, 'monitThread'):
            self.monitThread.stop()
            status = self.monitThread.running()
        # stop MSTransferor thread
        if 'transferor' in self.services and hasattr(self, 'transfThread'):
            self.transfThread.stop()  # stop checkStatus thread
            status = self.transfThread.running()
        return status

    def info(self, reqName=None):
        """
        Return transfer information for a given request
        :param reqName: request name
        :return: data transfer information for this request
        """
        data = {"request": reqName, "transferDoc": None}
        if reqName:
            # obtain the transfer information for a given request records from couchdb for given request
            if 'monitor' in self.services:
                transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName)
            elif 'transferor' in self.services:
                transferDoc = self.msTransferor.reqmgrAux.getTransferInfo(
                    reqName)
            if transferDoc:
                # it's always a single document in Couch
                data['transferDoc'] = transferDoc[0]
        return data

    def delete(self, request):
        "Delete request in backend"
        pass

    def status(self, detail):
        """
        Return the current status of a MicroService and a summary
        of its last execution activity.
        :param detail: boolean used to retrieve some extra information
          regarding the service
        :return: a dictionary
        """
        data = {"status": "OK"}
        if detail and 'transferor' in self.services:
            data.update(self.statusTrans)
        elif detail and 'monitor' in self.services:
            data.update(self.statusMon)
        return data

    def updateTimeUTC(self, reportDict, startT, endT):
        """
        Given a report summary dictionary and start/end time, update
        the report with human readable timing information
        :param reportDict: summary dictionary
        :param startT: epoch start time for a given service
        :param endT: epoch end time for a given service
        """
        reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['execution_time'] = (endT - startT).total_seconds()
Example #6
0
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}
        self.statusOutput = {}
        self.statusRuleCleaner = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("### Running %s thread %s", thname,
                             self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("+++ Running %s thread %s", thname,
                             self.monitThread.running())

        # initialize output module
        if 'output' in self.services:
            reqStatus = ['closed-out', 'announced']
            # thread safe cache to keep the last X requests processed in MSOutput
            requestNamesCached = deque(
                maxlen=self.msConfig.get("cacheRequestSize", 10000))

            thname = 'MSOutputConsumer'
            self.msOutputConsumer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            # set the consumer to run twice faster than the producer
            consumerInterval = self.msConfig['interval'] // 2
            self.outputConsumerThread = start_new_thread(
                thname, daemon, (self.outputConsumer, reqStatus,
                                 consumerInterval, self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputConsumerThread.running())

            thname = 'MSOutputProducer'
            self.msOutputProducer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            self.outputProducerThread = start_new_thread(
                thname, daemon, (self.outputProducer, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputProducerThread.running())

        # initialize rule cleaner module
        if 'ruleCleaner' in self.services:
            reqStatus = ['announced', 'aborted-completed', 'rejected']
            self.msRuleCleaner = MSRuleCleaner(self.msConfig,
                                               logger=self.logger)
            thname = 'MSRuleCleaner'
            self.ruleCleanerThread = start_new_thread(
                thname, daemon, (self.ruleCleaner, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.ruleCleanerThread.running())
Example #7
0
class MSManager(object):
    """
    Entry point for the MicroServices.
    This class manages both transferor and monitoring services.
    """
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}
        self.statusOutput = {}
        self.statusRuleCleaner = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("### Running %s thread %s", thname,
                             self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("+++ Running %s thread %s", thname,
                             self.monitThread.running())

        # initialize output module
        if 'output' in self.services:
            reqStatus = ['closed-out', 'announced']
            # thread safe cache to keep the last X requests processed in MSOutput
            requestNamesCached = deque(
                maxlen=self.msConfig.get("cacheRequestSize", 10000))

            thname = 'MSOutputConsumer'
            self.msOutputConsumer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            # set the consumer to run twice faster than the producer
            consumerInterval = self.msConfig['interval'] // 2
            self.outputConsumerThread = start_new_thread(
                thname, daemon, (self.outputConsumer, reqStatus,
                                 consumerInterval, self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputConsumerThread.running())

            thname = 'MSOutputProducer'
            self.msOutputProducer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            self.outputProducerThread = start_new_thread(
                thname, daemon, (self.outputProducer, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputProducerThread.running())

        # initialize rule cleaner module
        if 'ruleCleaner' in self.services:
            reqStatus = ['announced', 'aborted-completed', 'rejected']
            self.msRuleCleaner = MSRuleCleaner(self.msConfig,
                                               logger=self.logger)
            thname = 'MSRuleCleaner'
            self.ruleCleanerThread = start_new_thread(
                thname, daemon, (self.ruleCleaner, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.ruleCleanerThread.running())

    def _parseConfig(self, config):
        """
        __parseConfig_
        Parse the MicroService configuration and set any default values.
        :param config: config as defined in the deployment
        """
        self.logger.info("Using the following MicroServices config: %s",
                         config.dictionary_())
        self.services = getattr(config, 'services', [])

        self.msConfig = {}
        self.msConfig.update(config.dictionary_())
        self.msConfig.setdefault("useRucio", False)

        self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace(
            'reqmgr2', 'couchdb/reqmgr_workload_cache')

    def transferor(self, reqStatus):
        """
        MSManager transferor function.
        It performs Unified logic for data subscription and
        transfers requests from assigned to staging/staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the transferor thread...")
        res = self.msTransferor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total transferor execution time: %.2f secs",
                         res['execution_time'])
        self.statusTrans = res

    def monitor(self, reqStatus):
        """
        MSManager monitoring function.
        It performs transfer requests from staging to staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the monitor thread...")
        res = self.msMonitor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total monitor execution time: %d secs",
                         res['execution_time'])
        self.statusMon = res

    def outputConsumer(self, reqStatus):
        """
        MSManager Output Data Placement function.
        It subscribes the output datasets to the Data Management System.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output
        reqStatus: Status of requests to work on
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the outputConsumer thread...")
        res = self.msOutputConsumer.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total outputConsumer execution time: %d secs",
                         res['execution_time'])
        self.statusOutput = res

    def outputProducer(self, reqStatus):
        """
        MSManager MongoDB Uploader function.
        It uploads the documents describing a workflow output Data subscription
        into MongoDb. For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output
        reqStatus: Status of requests to work on
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the outputProducer thread...")
        res = self.msOutputProducer.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total outputProducer execution time: %d secs",
                         res['execution_time'])
        self.statusOutput = res

    def ruleCleaner(self, reqStatus):
        """
        MSManager ruleCleaner function.
        It cleans the block level Rucio rules created by WMAgent and
        performs request status transition from ['announced', 'aborted-completed', 'rejected'] to
        '{normal, aborted, rejected}-archived' state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-RuleCleaner
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the ruleCleaner thread...")
        res = self.msRuleCleaner.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total ruleCleaner execution time: %d secs",
                         res['execution_time'])
        self.statusRuleCleaner = res

    def stop(self):
        "Stop MSManager"
        status = None
        # stop MSMonitor thread
        if 'monitor' in self.services and hasattr(self, 'monitThread'):
            self.monitThread.stop()
            status = self.monitThread.running()
        # stop MSTransferor thread
        if 'transferor' in self.services and hasattr(self, 'transfThread'):
            self.transfThread.stop()  # stop checkStatus thread
            status = self.transfThread.running()
        # stop MSOutput threads
        if 'output' in self.services and hasattr(self, 'outputConsumerThread'):
            self.outputConsumerThread.stop()
            status = self.outputConsumerThread.running()
        if 'output' in self.services and hasattr(self, 'outputProducerThread'):
            self.outputProducerThread.stop()
            status = self.outputProducerThread.running()
        # stop MSRuleCleaner thread
        if 'ruleCleaner' in self.services and hasattr(self,
                                                      'ruleCleanerThread'):
            self.ruleCleanerThread.stop()
            status = self.ruleCleanerThread.running()
        return status

    def info(self, reqName=None):
        """
        Return transfer information for a given request
        :param reqName: request name
        :return: data transfer information for this request
        """
        data = {"request": reqName, "transferDoc": None}
        if reqName:
            # obtain the transfer information for a given request records from couchdb for given request
            if 'monitor' in self.services:
                transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName)
            elif 'transferor' in self.services:
                transferDoc = self.msTransferor.reqmgrAux.getTransferInfo(
                    reqName)
            elif 'output' in self.services:
                transferDoc = self.msOutputProducer.getTransferInfo(reqName)
            if transferDoc:
                # it's always a single document in Couch
                data['transferDoc'] = transferDoc[0]
        return data

    def delete(self, request):
        "Delete request in backend"
        pass

    def status(self, detail):
        """
        Return the current status of a MicroService and a summary
        of its last execution activity.
        :param detail: boolean used to retrieve some extra information
          regarding the service
        :return: a dictionary
        """
        data = {"status": "OK"}
        if detail and 'transferor' in self.services:
            data.update(self.statusTrans)
        elif detail and 'monitor' in self.services:
            data.update(self.statusMon)
        elif detail and 'output' in self.services:
            data.update(self.statusOutput)
        elif detail and 'ruleCleaner' in self.services:
            data.update(self.statusRuleCleaner)
        return data

    def updateTimeUTC(self, reportDict, startT, endT):
        """
        Given a report summary dictionary and start/end time, update
        the report with human readable timing information
        :param reportDict: summary dictionary
        :param startT: epoch start time for a given service
        :param endT: epoch end time for a given service
        """
        reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['execution_time'] = (endT - startT).total_seconds()
Example #8
0
class MSManager(object):
    """
    Entry point for the MicroServices.
    This class manages both transferor and monitor services/threads.
    """
    def __init__(self, config=None, logger=None):
        """
        Setup a bunch of things, like:
         * logger for this service
         * initialize all the necessary service helpers
         * fetch the unified configuration from central couch
         * update the unified configuration with some deployment and default settings
         * start both transfer and monitor threads
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.uConfig = {}
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)

        self.reqmgr2 = ReqMgr(self.msConfig['reqmgrUrl'], logger=self.logger)
        self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgrUrl'],
                                   httpDict={'cacheduration': 60},
                                   logger=self.logger)

        # transferor has to look at workflows in assigned status
        self.msTransferor = MSTransferor(self.msConfig,
                                         "assigned",
                                         logger=self.logger)

        ### Last but not least, get the threads started
        thname = 'MSTransferor'
        self.transfThread = start_new_thread(
            thname, daemon, (self.transferor, 'assigned',
                             self.msConfig['interval'], self.logger))
        self.logger.debug("### Running %s thread %s", thname,
                          self.transfThread.running())

        thname = 'MSTransferorMonit'
        self.monitThread = start_new_thread(
            thname, daemon, (self.monitor, 'staging',
                             self.msConfig['interval'] * 2, self.logger))
        self.logger.debug("+++ Running %s thread %s", thname,
                          self.monitThread.running())

    def _parseConfig(self, config):
        """
        __parseConfig_
        Parse the MicroService configuration and set any default values.
        :param config: config as defined in the deployment
        """
        self.logger.info("Using the following config:\n%s", config)

        self.msConfig = {}
        self.msConfig['verbose'] = getattr(config, 'verbose', False)
        self.msConfig['group'] = getattr(config, 'group', 'DataOps')
        self.msConfig['interval'] = getattr(config, 'interval', 5 * 60)
        self.msConfig['readOnly'] = getattr(config, 'readOnly', True)

        self.msConfig['reqmgrUrl'] = getattr(config, 'reqmgr2Url',
                                             'https://cmsweb.cern.ch/reqmgr2')
        self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgrUrl'].replace(
            'reqmgr2', 'couchdb/reqmgr_workload_cache')
        self.msConfig['phedexUrl'] = getattr(
            config, 'phedexUrl',
            'https://cmsweb.cern.ch/phedex/datasvc/json/prod')
        self.msConfig['dbsUrl'] = getattr(
            config, 'dbsUrl',
            'https://cmsweb.cern.ch/dbs/prod/global/DBSReader')

    def transferor(self, reqStatus):
        """
        MSManager transferor function.
        It performs Unified logic for data subscription and
        transfers requests from assigned to staging/staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startT = time.time()
        self.logger.info("Starting the transferor thread...")
        self.msTransferor.execute()
        self.logger.info("Total transferor execution time: %.2f secs",
                         time.time() - startT)

    def monitor(self, reqStatus='staging'):
        """
        MSManager monitoring function.
        It performs transfer requests from staging to staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startT = time.time()
        self.logger.info("Starting the monitor thread...")
        # First, fetch/update our unified configuration from reqmgr_aux db
        # Keep our own copy of the unified config to avoid race conditions
        self.uConfig = self.reqmgrAux.getUnifiedConfig(docName="config")
        if not self.uConfig:
            self.logger.warning(
                "Monitor failed to fetch the unified config. Skipping this cycle."
            )
            return
        self.uConfig = self.uConfig[0]

        try:
            # get requests from ReqMgr2 data-service for given statue
            # here with detail=False we get back list of records
            requests = self.reqmgr2.getRequestByStatus([reqStatus],
                                                       detail=False)
            self.logger.debug('+++ monit found %s requests in %s state',
                              len(requests), reqStatus)

            requestStatus = {}  # keep track of request statuses
            for reqName in requests:
                req = {'name': reqName, 'reqStatus': reqStatus}
                # get transfer IDs
                tids = self.getTransferIDs()
                # get transfer status
                transferStatuses = self.getTransferStatuses(tids)
                # get campaing and unified configuration
                campaign = self.requestCampaign(reqName)
                conf = self.requestConfiguration(reqName)
                self.logger.debug("+++ request %s campaing %s conf %s", req,
                                  campaign, conf)

                # if all transfers are completed, move the request status staging -> staged
                # completed = self.checkSubscription(request)
                completed = 100  # TMP
                if completed == 100:  # all data are staged
                    self.logger.debug(
                        "+++ request %s all transfers are completed", req)
                    self.change(req, 'staged', '+++ monit')
                # if pileup transfers are completed AND some input blocks are completed, move the request status staging -> staged
                elif self.pileupTransfersCompleted(tids):
                    self.logger.debug(
                        "+++ request %s pileup transfers are completed", req)
                    self.change(req, 'staged', '+++ monit')
                # transfers not completed, just update the database with their completion
                else:
                    self.logger.debug(
                        "+++ request %s transfers are not completed", req)
                    requestStatus[
                        req] = transferStatuses  # TODO: implement update of transfer ids
            self.updateTransferIDs(requestStatus)
        except Exception as err:  # general error
            self.logger.exception('+++ monit error: %s', str(err))
        self.logger.info("Total monitor execution time: %.2f secs",
                         time.time() - startT)

    def stop(self):
        "Stop MSManager"
        # stop MSTransferorMonit thread
        self.monitThread.stop()
        # stop MSTransferor thread
        self.transfThread.stop()  # stop checkStatus thread
        status = self.transfThread.running()
        return status

    def getTransferIDsDoc(self):
        """
        Get transfer ids document from backend. The document has the following form:
        {
          "wf_A": [record1, record2, ...],
          "wf_B": [....],
        }
        where each record has the following format:
        {"timestamp":000, "dataset":"/a/b/c", "type": "primary", "trainsferIDs": [1,2,3]}
        """
        doc = {}
        return doc

    def updateTransferIDs(self, requestStatus):
        "Update transfer ids in backend"
        # TODO/Wait: https://github.com/dmwm/WMCore/issues/9198
        # doc = self.getTransferIDsDoc()

    def getTransferIDs(self):
        "Get transfer ids from backend"
        # TODO/Wait: https://github.com/dmwm/WMCore/issues/9198
        # meanwhile return transfer ids from internal store
        return []

    def getTransferStatuses(self, tids):
        "get transfer statuses for given transfer IDs from backend"
        # transfer docs on backend has the following form
        # https://gist.github.com/amaltaro/72599f995b37a6e33566f3c749143154
        statuses = {}
        for tid in tids:
            # TODO: I need to find request name from transfer ID
            # status = self.checkSubscription(request)
            status = 100
            statuses[tid] = status
        return statuses

    def requestCampaign(self, req):
        "Return request campaign"
        return 'campaign_TODO'  # TODO

    def requestConfiguration(self, req):
        "Return request configuration"
        return {}

    def pileupTransfersCompleted(self, tids):
        "Check if pileup transfers are completed"
        # TODO: add implementation
        return False

    def checkSubscription(self, req):
        "Send request to Phedex and return status of request subscription"
        sdict = {}
        for dataset in req.get('datasets', []):
            data = self.phedex.subscriptions(dataset=dataset,
                                             group=self.msConfig['group'])
            self.logger.debug("### dataset %s group %s", dataset,
                              self.msConfig['group'])
            self.logger.debug("### subscription %s", data)
            for row in data['phedex']['dataset']:
                if row['name'] != dataset:
                    continue
                nodes = [s['node'] for s in row['subscription']]
                rNodes = req.get('sites')
                self.logger.debug("### nodes %s %s", nodes, rNodes)
                subset = set(nodes) & set(rNodes)
                if subset == set(rNodes):
                    sdict[dataset] = 1
                else:
                    pct = float(len(subset)) / float(len(set(rNodes)))
                    sdict[dataset] = pct
        self.logger.debug("### sdict %s", sdict)
        tot = len(sdict.keys())
        if not tot:
            return -1
        # return percentage of completion
        return round(float(sum(sdict.values())) / float(tot), 2) * 100

    def checkStatus(self, req):
        "Check status of request in local storage"
        self.logger.debug("### checkStatus of request: %s", req['name'])
        # check subscription status of the request
        # completed = self.checkSubscription(req)
        completed = 100
        if completed == 100:  # all data are staged
            self.logger.debug(
                "### request is completed, change its status and remove it from the store"
            )
            self.change(req, 'staged', '### transferor')
        else:
            self.logger.debug("### request %s, completed %s", req, completed)

    def info(self, req):
        "Return info about given request"
        completed = self.checkSubscription(req)
        return {'request': req, 'status': completed}

    def delete(self, request):
        "Delete request in backend"
        pass

    def change(self, req, reqStatus, prefix='###'):
        """
        Change request status, internally it is done via PUT request to ReqMgr2:
        curl -X PUT -H "Content-Type: application/json" \
             -d '{"RequestStatus":"staging", "RequestName":"bla-bla"}' \
             https://xxx.yyy.zz/reqmgr2/data/request
        """
        self.logger.debug('%s updating %s status to %s', prefix, req['name'],
                          reqStatus)
        try:
            if not self.msConfig['readOnly']:
                self.reqmgr2.updateRequestStatus(req['name'], reqStatus)
        except Exception as err:
            self.logger.exception("Failed to change request status. Error: %s",
                                  str(err))