Beispiel #1
0
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.debug("### Running %s thread %s", thname,
                              self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.debug("+++ Running %s thread %s", thname,
                              self.monitThread.running())
Beispiel #2
0
    def setUp(self):
        "init test class"
        self.msConfig = {'verbose': False,
                         'group': 'DataOps',
                         'interval': 1 * 60,
                         'updateInterval': 0,
                         'enableStatusTransition': True,
                         'reqmgr2Url': 'https://cmsweb-testbed.cern.ch/reqmgr2',
                         'reqmgrCacheUrl': 'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache',
                         'phedexUrl': 'https://cmsweb-testbed.cern.ch/phedex/datasvc/json/prod',
                         'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader'}

        self.ms = MSMonitor(self.msConfig)
        self.ms.reqmgrAux = MockReqMgrAux()
        super(MSMonitorTest, self).setUp()
Beispiel #3
0
class MSManager(object):
    """
    Entry point for the MicroServices.
    This class manages both transferor and monitoring services.
    """
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.debug("### Running %s thread %s", thname,
                              self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.debug("+++ Running %s thread %s", thname,
                              self.monitThread.running())

    def _parseConfig(self, config):
        """
        __parseConfig_
        Parse the MicroService configuration and set any default values.
        :param config: config as defined in the deployment
        """
        self.logger.info("Using the following MicroServices config: %s",
                         config.dictionary_())
        self.services = getattr(config, 'services', [])

        self.msConfig = {}
        self.msConfig.update(config.dictionary_())
        self.msConfig.setdefault("useRucio", False)

        self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace(
            'reqmgr2', 'couchdb/reqmgr_workload_cache')

    def transferor(self, reqStatus):
        """
        MSManager transferor function.
        It performs Unified logic for data subscription and
        transfers requests from assigned to staging/staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the transferor thread...")
        res = self.msTransferor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total transferor execution time: %.2f secs",
                         res['execution_time'])
        self.statusTrans = res

    def monitor(self, reqStatus):
        """
        MSManager monitoring function.
        It performs transfer requests from staging to staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the monitor thread...")
        res = self.msMonitor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total monitor execution time: %d secs",
                         res['execution_time'])
        self.statusMon = res

    def stop(self):
        "Stop MSManager"
        status = None
        # stop MSMonitor thread
        if 'monitor' in self.services and hasattr(self, 'monitThread'):
            self.monitThread.stop()
            status = self.monitThread.running()
        # stop MSTransferor thread
        if 'transferor' in self.services and hasattr(self, 'transfThread'):
            self.transfThread.stop()  # stop checkStatus thread
            status = self.transfThread.running()
        return status

    def info(self, reqName=None):
        """
        Return transfer information for a given request
        :param reqName: request name
        :return: data transfer information for this request
        """
        data = {"request": reqName, "transferDoc": None}
        if reqName:
            # obtain the transfer information for a given request records from couchdb for given request
            if 'monitor' in self.services:
                transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName)
            elif 'transferor' in self.services:
                transferDoc = self.msTransferor.reqmgrAux.getTransferInfo(
                    reqName)
            if transferDoc:
                # it's always a single document in Couch
                data['transferDoc'] = transferDoc[0]
        return data

    def delete(self, request):
        "Delete request in backend"
        pass

    def status(self, detail):
        """
        Return the current status of a MicroService and a summary
        of its last execution activity.
        :param detail: boolean used to retrieve some extra information
          regarding the service
        :return: a dictionary
        """
        data = {"status": "OK"}
        if detail and 'transferor' in self.services:
            data.update(self.statusTrans)
        elif detail and 'monitor' in self.services:
            data.update(self.statusMon)
        return data

    def updateTimeUTC(self, reportDict, startT, endT):
        """
        Given a report summary dictionary and start/end time, update
        the report with human readable timing information
        :param reportDict: summary dictionary
        :param startT: epoch start time for a given service
        :param endT: epoch end time for a given service
        """
        reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['execution_time'] = (endT - startT).total_seconds()
Beispiel #4
0
class MSMonitorTest(EmulatedUnitTestCase):
    "Unit test for Monitor module"

    def setUp(self):
        "init test class"
        self.msConfig = {'verbose': False,
                         'group': 'DataOps',
                         'interval': 1 * 60,
                         'updateInterval': 0,
                         'enableStatusTransition': True,
                         'reqmgr2Url': 'https://cmsweb-testbed.cern.ch/reqmgr2',
                         'reqmgrCacheUrl': 'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache',
                         'phedexUrl': 'https://cmsweb-testbed.cern.ch/phedex/datasvc/json/prod',
                         'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader',
                         'rucioAccount': "wma_test",
                         'rucioUrl': "http://cmsrucio-int.cern.ch",
                         'rucioAuthUrl': "https://cmsrucio-auth-int.cern.ch"}

        self.ms = MSMonitor(self.msConfig)
        self.ms.reqmgrAux = MockReqMgrAux()
        super(MSMonitorTest, self).setUp()

    def testUpdateCaches(self):
        """
        Test the getCampaignConfig method
        """
        campaigns, transfersDocs = self.ms.updateCaches()
        self.assertNotEqual(transfersDocs, [])
        self.assertEqual(len(transfersDocs[0]['transfers']), 1)
        self.assertTrue(time.time() > transfersDocs[0]['lastUpdate'], 1)

        self.assertNotEqual(campaigns, [])
        for cname, cdict in campaigns.items():
            self.assertEqual(cname, cdict['CampaignName'])
            self.assertEqual(isinstance(cdict, dict), True)
            self.assertNotEqual(cdict.get('CampaignName', {}), {})

    def testGetTransferInfo(self):
        """
        Test the getTransferInfo method
        """
        _, transfersDocs = self.ms.updateCaches()
        transfersDocs[0]['transfers'] = []
        originalTransfers = deepcopy(transfersDocs)
        self.ms.getTransferInfo(transfersDocs)

        self.assertNotEqual(transfersDocs, [])
        self.assertEqual(len(transfersDocs), len(originalTransfers))
        for rec in transfersDocs:
            self.assertEqual(isinstance(rec, dict), True)
            keys = sorted(['workflowName', 'lastUpdate', 'transfers'])
            self.assertEqual(keys, sorted(rec.keys()))
            self.assertTrue(time.time() >= rec['lastUpdate'])

    def testCompletion(self):
        """
        Test the completion method
        """
        campaigns, transfersDocs = self.ms.updateCaches()
        transfersDocs.append(deepcopy(transfersDocs[0]))
        transfersDocs.append(deepcopy(transfersDocs[0]))
        transfersDocs[0]['transfers'] = []
        transfersDocs[0]['workflowName'] = 'workflow_0'
        transfersDocs[1]['transfers'][0]['completion'].append(100)
        transfersDocs[1]['workflowName'] = 'workflow_1'
        transfersDocs[2]['workflowName'] = 'workflow_2'
        self.assertEqual(len(transfersDocs), 3)

        completedWfs = self.ms.getCompletedWorkflows(transfersDocs, campaigns)
        self.assertEqual(len(completedWfs), 2)

    def testUpdateTransferInfo(self):
        """
        Test the updateTransferInfo method
        """
        _, transferRecords = self.ms.updateCaches()
        failed = self.ms.updateTransferDocs(transferRecords, workflowsToSkip=[])
        self.assertEqual(len(failed), len(transferRecords))
Beispiel #5
0
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}
        self.statusOutput = {}
        self.statusRuleCleaner = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("### Running %s thread %s", thname,
                             self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("+++ Running %s thread %s", thname,
                             self.monitThread.running())

        # initialize output module
        if 'output' in self.services:
            reqStatus = ['closed-out', 'announced']
            # thread safe cache to keep the last X requests processed in MSOutput
            requestNamesCached = deque(
                maxlen=self.msConfig.get("cacheRequestSize", 10000))

            thname = 'MSOutputConsumer'
            self.msOutputConsumer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            # set the consumer to run twice faster than the producer
            consumerInterval = self.msConfig['interval'] // 2
            self.outputConsumerThread = start_new_thread(
                thname, daemon, (self.outputConsumer, reqStatus,
                                 consumerInterval, self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputConsumerThread.running())

            thname = 'MSOutputProducer'
            self.msOutputProducer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            self.outputProducerThread = start_new_thread(
                thname, daemon, (self.outputProducer, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputProducerThread.running())

        # initialize rule cleaner module
        if 'ruleCleaner' in self.services:
            reqStatus = ['announced', 'aborted-completed', 'rejected']
            self.msRuleCleaner = MSRuleCleaner(self.msConfig,
                                               logger=self.logger)
            thname = 'MSRuleCleaner'
            self.ruleCleanerThread = start_new_thread(
                thname, daemon, (self.ruleCleaner, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.ruleCleanerThread.running())
Beispiel #6
0
class MSManager(object):
    """
    Entry point for the MicroServices.
    This class manages both transferor and monitoring services.
    """
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}
        self.statusOutput = {}
        self.statusRuleCleaner = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("### Running %s thread %s", thname,
                             self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("+++ Running %s thread %s", thname,
                             self.monitThread.running())

        # initialize output module
        if 'output' in self.services:
            reqStatus = ['closed-out', 'announced']
            # thread safe cache to keep the last X requests processed in MSOutput
            requestNamesCached = deque(
                maxlen=self.msConfig.get("cacheRequestSize", 10000))

            thname = 'MSOutputConsumer'
            self.msOutputConsumer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            # set the consumer to run twice faster than the producer
            consumerInterval = self.msConfig['interval'] // 2
            self.outputConsumerThread = start_new_thread(
                thname, daemon, (self.outputConsumer, reqStatus,
                                 consumerInterval, self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputConsumerThread.running())

            thname = 'MSOutputProducer'
            self.msOutputProducer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            self.outputProducerThread = start_new_thread(
                thname, daemon, (self.outputProducer, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputProducerThread.running())

        # initialize rule cleaner module
        if 'ruleCleaner' in self.services:
            reqStatus = ['announced', 'aborted-completed', 'rejected']
            self.msRuleCleaner = MSRuleCleaner(self.msConfig,
                                               logger=self.logger)
            thname = 'MSRuleCleaner'
            self.ruleCleanerThread = start_new_thread(
                thname, daemon, (self.ruleCleaner, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.ruleCleanerThread.running())

    def _parseConfig(self, config):
        """
        __parseConfig_
        Parse the MicroService configuration and set any default values.
        :param config: config as defined in the deployment
        """
        self.logger.info("Using the following MicroServices config: %s",
                         config.dictionary_())
        self.services = getattr(config, 'services', [])

        self.msConfig = {}
        self.msConfig.update(config.dictionary_())
        self.msConfig.setdefault("useRucio", False)

        self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace(
            'reqmgr2', 'couchdb/reqmgr_workload_cache')

    def transferor(self, reqStatus):
        """
        MSManager transferor function.
        It performs Unified logic for data subscription and
        transfers requests from assigned to staging/staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the transferor thread...")
        res = self.msTransferor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total transferor execution time: %.2f secs",
                         res['execution_time'])
        self.statusTrans = res

    def monitor(self, reqStatus):
        """
        MSManager monitoring function.
        It performs transfer requests from staging to staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the monitor thread...")
        res = self.msMonitor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total monitor execution time: %d secs",
                         res['execution_time'])
        self.statusMon = res

    def outputConsumer(self, reqStatus):
        """
        MSManager Output Data Placement function.
        It subscribes the output datasets to the Data Management System.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output
        reqStatus: Status of requests to work on
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the outputConsumer thread...")
        res = self.msOutputConsumer.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total outputConsumer execution time: %d secs",
                         res['execution_time'])
        self.statusOutput = res

    def outputProducer(self, reqStatus):
        """
        MSManager MongoDB Uploader function.
        It uploads the documents describing a workflow output Data subscription
        into MongoDb. For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output
        reqStatus: Status of requests to work on
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the outputProducer thread...")
        res = self.msOutputProducer.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total outputProducer execution time: %d secs",
                         res['execution_time'])
        self.statusOutput = res

    def ruleCleaner(self, reqStatus):
        """
        MSManager ruleCleaner function.
        It cleans the block level Rucio rules created by WMAgent and
        performs request status transition from ['announced', 'aborted-completed', 'rejected'] to
        '{normal, aborted, rejected}-archived' state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-RuleCleaner
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the ruleCleaner thread...")
        res = self.msRuleCleaner.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total ruleCleaner execution time: %d secs",
                         res['execution_time'])
        self.statusRuleCleaner = res

    def stop(self):
        "Stop MSManager"
        status = None
        # stop MSMonitor thread
        if 'monitor' in self.services and hasattr(self, 'monitThread'):
            self.monitThread.stop()
            status = self.monitThread.running()
        # stop MSTransferor thread
        if 'transferor' in self.services and hasattr(self, 'transfThread'):
            self.transfThread.stop()  # stop checkStatus thread
            status = self.transfThread.running()
        # stop MSOutput threads
        if 'output' in self.services and hasattr(self, 'outputConsumerThread'):
            self.outputConsumerThread.stop()
            status = self.outputConsumerThread.running()
        if 'output' in self.services and hasattr(self, 'outputProducerThread'):
            self.outputProducerThread.stop()
            status = self.outputProducerThread.running()
        # stop MSRuleCleaner thread
        if 'ruleCleaner' in self.services and hasattr(self,
                                                      'ruleCleanerThread'):
            self.ruleCleanerThread.stop()
            status = self.ruleCleanerThread.running()
        return status

    def info(self, reqName=None):
        """
        Return transfer information for a given request
        :param reqName: request name
        :return: data transfer information for this request
        """
        data = {"request": reqName, "transferDoc": None}
        if reqName:
            # obtain the transfer information for a given request records from couchdb for given request
            if 'monitor' in self.services:
                transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName)
            elif 'transferor' in self.services:
                transferDoc = self.msTransferor.reqmgrAux.getTransferInfo(
                    reqName)
            elif 'output' in self.services:
                transferDoc = self.msOutputProducer.getTransferInfo(reqName)
            if transferDoc:
                # it's always a single document in Couch
                data['transferDoc'] = transferDoc[0]
        return data

    def delete(self, request):
        "Delete request in backend"
        pass

    def status(self, detail):
        """
        Return the current status of a MicroService and a summary
        of its last execution activity.
        :param detail: boolean used to retrieve some extra information
          regarding the service
        :return: a dictionary
        """
        data = {"status": "OK"}
        if detail and 'transferor' in self.services:
            data.update(self.statusTrans)
        elif detail and 'monitor' in self.services:
            data.update(self.statusMon)
        elif detail and 'output' in self.services:
            data.update(self.statusOutput)
        elif detail and 'ruleCleaner' in self.services:
            data.update(self.statusRuleCleaner)
        return data

    def updateTimeUTC(self, reportDict, startT, endT):
        """
        Given a report summary dictionary and start/end time, update
        the report with human readable timing information
        :param reportDict: summary dictionary
        :param startT: epoch start time for a given service
        :param endT: epoch end time for a given service
        """
        reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['execution_time'] = (endT - startT).total_seconds()