Esempio n. 1
0
    def setUp(self):
        "init test class"
        self.msConfig = {
            'services': ['transferor'],
            'verbose': False,
            'interval': 1 * 60,
            'enableStatusTransition': True,
            'enableDataTransfer': False,
            'reqmgr2Url': 'https://cmsweb-testbed.cern.ch/reqmgr2',
            'reqmgrCacheUrl':
            'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache',
            'quotaUsage': 0.9,
            'rucioAccount': 'wma_test',  # it should be wmcore_transferor
            # 'rucioAuthUrl': 'https://cms-rucio-auth.cern.ch',
            # 'rucioUrl': 'http://cms-rucio.cern.ch',
            'rucioAuthUrl': 'https://cmsrucio-auth-int.cern.ch',
            'rucioUrl': 'http://cmsrucio-int.cern.ch',
            'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader'
        }

        self.msTransferor = MSTransferor(self.msConfig)

        self.taskChainTempl = getTestFile(
            'data/ReqMgr/requests/Integration/TaskChain_Prod.json')
        self.stepChainTempl = getTestFile(
            'data/ReqMgr/requests/Integration/SC_LumiMask_PhEDEx.json')
        super(TransferorTest, self).setUp()
        if PY3:
            self.assertItemsEqual = self.assertCountEqual
Esempio n. 2
0
class TransferorTest(EmulatedUnitTestCase):
    "Unit test for Transferor module"

    def setUp(self):
        "init test class"
        self.msConfig = {'services': ['transferor'],
                         'verbose': False,
                         'interval': 1 * 60,
                         'enableStatusTransition': True,
                         'enableDataTransfer': False,
                         'reqmgr2Url': 'https://cmsweb-testbed.cern.ch/reqmgr2',
                         'reqmgrCacheUrl': 'https://cmsweb-testbed.cern.ch/couchdb/reqmgr_workload_cache',
                         'quotaUsage': 0.9,
                         'rucioAccount': 'wma_test',  # it should be wmcore_transferor
                         # 'rucioAuthUrl': 'https://cms-rucio-auth.cern.ch',
                         # 'rucioUrl': 'http://cms-rucio.cern.ch',
                         'rucioAuthUrl': 'https://cms-rucio-auth-int.cern.ch',
                         'rucioUrl': 'http://cms-rucio-int.cern.ch',
                         'dbsUrl': 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader'}

        self.msTransferor = MSTransferor(self.msConfig)

        self.taskChainTempl = getTestFile('data/ReqMgr/requests/Integration/TaskChain_Prod.json')
        self.stepChainTempl = getTestFile('data/ReqMgr/requests/Integration/SC_LumiMask_PhEDEx.json')
        super(TransferorTest, self).setUp()
        if PY3:
            self.assertItemsEqual = self.assertCountEqual

    def testGetPNNsFromPSNs(self):
        """Test MSTransferor private method _getPNNsFromPSNs()"""
        self.assertItemsEqual(self.msTransferor.psn2pnnMap, {})

        # now fill up the cache
        self.msTransferor.psn2pnnMap = self.msTransferor.cric.PSNtoPNNMap()

        self.assertItemsEqual(self.msTransferor._getPNNsFromPSNs([]), set())
        pnns = self.msTransferor._getPNNsFromPSNs(["T1_IT_CNAF", "T1_IT_CNAF_Disk"])
        self.assertItemsEqual(pnns, set(["T1_IT_CNAF_Disk"]))

        # dropping T3s and CERNBOX
        pnns = self.msTransferor._getPNNsFromPSNs(["T1_US_FNAL", "T2_CH_CERN_HLT"])
        self.assertItemsEqual(pnns, set(["T1_US_FNAL_Disk", "T2_CH_CERN"]))

        # testing with non-existant PSNs
        psns = self.msTransferor._getPNNsFromPSNs(["T1_US_FNAL_Disk", "T2_CH_CERNBOX"])
        self.assertItemsEqual(psns, set())

    def testGetPSNsFromPNNs(self):
        """Test MSTransferor private method _getPSNsFromPNNs()"""
        self.assertItemsEqual(self.msTransferor.pnn2psnMap, {})

        # now fill up the cache
        self.msTransferor.pnn2psnMap = self.msTransferor.cric.PNNtoPSNMap()

        self.assertItemsEqual(self.msTransferor._getPSNsFromPNNs([]), set())
        psns = self.msTransferor._getPSNsFromPNNs(["T1_IT_CNAF", "T1_IT_CNAF_Disk"])
        self.assertItemsEqual(psns, set(["T1_IT_CNAF"]))

        # test dropping T3s
        psns = self.msTransferor._getPSNsFromPNNs(["T2_UK_SGrid_RALPP"])
        self.assertItemsEqual(psns, set(["T2_UK_SGrid_RALPP"]))

        # testing with non-existant PNNs
        psns = self.msTransferor._getPSNsFromPNNs(["T1_US_FNAL", "T2_CH_CERN_HLT"])
        self.assertItemsEqual(psns, set())

    def testDiskPNNs(self):
        """Test MSTransferor private method _diskPNNs()"""
        # empty list of pnns
        self.assertItemsEqual(self.msTransferor._diskPNNs([]), set())

        # only PNNs that will be dropped
        pnns = self.msTransferor._diskPNNs(["T1_US_FNAL_Tape", "T1_US_FNAL_MSS",
                                            "T2_CH_CERNBOX", "T0_CH_CERN_Export"])
        self.assertItemsEqual(pnns, set())

        # valid PNNs that can receive data
        pnns = self.msTransferor._diskPNNs(["T1_US_FNAL_Disk", "T2_CH_CERN", "T2_DE_DESY"])
        self.assertItemsEqual(pnns, set(["T1_US_FNAL_Disk", "T2_CH_CERN", "T2_DE_DESY"]))

        # finally, a mix of valid and invalid PNNs
        pnns = self.msTransferor._diskPNNs(["T1_US_FNAL_Disk", "T1_US_FNAL_MSS", "T1_US_FNAL_Tape",
                                            "T2_CH_CERN", "T2_DE_DESY"])
        self.assertItemsEqual(pnns, set(["T1_US_FNAL_Disk", "T2_CH_CERN", "T2_DE_DESY"]))

    def notestRequestRecord(self):
        """
        Test the requestRecord method
        """
        default = {'name': '', 'reqStatus': None, 'SiteWhiteList': [],
                   'SiteBlackList': [], 'datasets': [], 'campaign': []}
        self.assertItemsEqual(self.msTransferor.requestRecord({}), default)

        with open(self.taskChainTempl) as jo:
            reqData = json.load(jo)['createRequest']
        expectedRes = [{'type': 'MCPileup',
                        'name': '/Neutrino_E-10_gun/RunIISummer17PrePremix-PUAutumn18_102X_upgrade2018_realistic_v15-v1/GEN-SIM-DIGI-RAW'},
                       {'type': 'MCPileup',
                        'name': '/Neutrino_E-10_gun/RunIISummer17PrePremix-PUAutumn18_102X_upgrade2018_realistic_v15-v1/GEN-SIM-DIGI-RAW'}]
        resp = self.msTransferor.requestRecord(reqData)['datasets']
        self.assertEqual(len(resp), 2)
        for idx in range(len(resp)):
            self.assertItemsEqual(resp[idx], expectedRes[idx])

        with open(self.stepChainTempl) as jo:
            reqData = json.load(jo)['createRequest']
        expectedRes = [{'type': 'InputDataset',
                        'name': '/RelValH125GGgluonfusion_14/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM'},
                       {'type': 'MCPileup',
                        'name': '/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM'},
                       {'type': 'MCPileup',
                        'name': '/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM'}]
        resp = self.msTransferor.requestRecord(reqData)['datasets']
        self.assertEqual(len(resp), 3)
        for idx in range(len(resp)):
            self.assertItemsEqual(resp[idx], expectedRes[idx])
Esempio n. 3
0
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}
        self.statusOutput = {}
        self.statusRuleCleaner = {}
        self.statusUnmerged = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("### Running %s thread %s", thname,
                             self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("+++ Running %s thread %s", thname,
                             self.monitThread.running())

        # initialize output module
        if 'output' in self.services:
            reqStatus = ['closed-out', 'announced']
            # thread safe cache to keep the last X requests processed in MSOutput
            requestNamesCached = deque(
                maxlen=self.msConfig.get("cacheRequestSize", 10000))

            thname = 'MSOutputConsumer'
            self.msOutputConsumer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            # set the consumer to run twice faster than the producer
            consumerInterval = self.msConfig['interval'] // 2
            self.outputConsumerThread = start_new_thread(
                thname, daemon, (self.outputConsumer, reqStatus,
                                 consumerInterval, self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputConsumerThread.running())

            thname = 'MSOutputProducer'
            self.msOutputProducer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            self.outputProducerThread = start_new_thread(
                thname, daemon, (self.outputProducer, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputProducerThread.running())

        # initialize rule cleaner module
        if 'ruleCleaner' in self.services:
            reqStatus = ['announced', 'aborted-completed', 'rejected']
            self.msRuleCleaner = MSRuleCleaner(self.msConfig,
                                               logger=self.logger)
            thname = 'MSRuleCleaner'
            self.ruleCleanerThread = start_new_thread(
                thname, daemon, (self.ruleCleaner, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.ruleCleanerThread.running())

        # initialize unmerged module
        if 'unmerged' in self.services:
            self.msUnmerged = MSUnmerged(self.msConfig, logger=self.logger)
            thname = 'MSUnmerged'
            self.unmergedThread = start_new_thread(
                thname, daemonOpt,
                (self.unmerged, self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.unmergedThread.running())
Esempio n. 4
0
class MSManager(object):
    """
    Entry point for the MicroServices.
    This class manages both transferor and monitoring services.
    """
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}
        self.statusOutput = {}
        self.statusRuleCleaner = {}
        self.statusUnmerged = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("### Running %s thread %s", thname,
                             self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("+++ Running %s thread %s", thname,
                             self.monitThread.running())

        # initialize output module
        if 'output' in self.services:
            reqStatus = ['closed-out', 'announced']
            # thread safe cache to keep the last X requests processed in MSOutput
            requestNamesCached = deque(
                maxlen=self.msConfig.get("cacheRequestSize", 10000))

            thname = 'MSOutputConsumer'
            self.msOutputConsumer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            # set the consumer to run twice faster than the producer
            consumerInterval = self.msConfig['interval'] // 2
            self.outputConsumerThread = start_new_thread(
                thname, daemon, (self.outputConsumer, reqStatus,
                                 consumerInterval, self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputConsumerThread.running())

            thname = 'MSOutputProducer'
            self.msOutputProducer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            self.outputProducerThread = start_new_thread(
                thname, daemon, (self.outputProducer, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputProducerThread.running())

        # initialize rule cleaner module
        if 'ruleCleaner' in self.services:
            reqStatus = ['announced', 'aborted-completed', 'rejected']
            self.msRuleCleaner = MSRuleCleaner(self.msConfig,
                                               logger=self.logger)
            thname = 'MSRuleCleaner'
            self.ruleCleanerThread = start_new_thread(
                thname, daemon, (self.ruleCleaner, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.ruleCleanerThread.running())

        # initialize unmerged module
        if 'unmerged' in self.services:
            self.msUnmerged = MSUnmerged(self.msConfig, logger=self.logger)
            thname = 'MSUnmerged'
            self.unmergedThread = start_new_thread(
                thname, daemonOpt,
                (self.unmerged, self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.unmergedThread.running())

    def _parseConfig(self, config):
        """
        __parseConfig_
        Parse the MicroService configuration and set any default values.
        :param config: config as defined in the deployment
        """
        self.logger.info("Using the following MicroServices config: %s",
                         config.dictionary_())
        self.services = getattr(config, 'services', [])

        self.msConfig = {}
        self.msConfig.update(config.dictionary_())

        self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgr2Url'].replace(
            'reqmgr2', 'couchdb/reqmgr_workload_cache')

    def transferor(self, reqStatus):
        """
        MSManager transferor function.
        It performs Unified logic for data subscription and
        transfers requests from assigned to staging/staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the transferor thread...")
        res = self.msTransferor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total transferor execution time: %.2f secs",
                         res['execution_time'])
        self.statusTrans = res

    def monitor(self, reqStatus):
        """
        MSManager monitoring function.
        It performs transfer requests from staging to staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the monitor thread...")
        res = self.msMonitor.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total monitor execution time: %d secs",
                         res['execution_time'])
        self.statusMon = res

    def outputConsumer(self, reqStatus):
        """
        MSManager Output Data Placement function.
        It subscribes the output datasets to the Data Management System.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output
        reqStatus: Status of requests to work on
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the outputConsumer thread...")
        res = self.msOutputConsumer.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total outputConsumer execution time: %d secs",
                         res['execution_time'])
        self.statusOutput = res

    def outputProducer(self, reqStatus):
        """
        MSManager MongoDB Uploader function.
        It uploads the documents describing a workflow output Data subscription
        into MongoDb. For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Output
        reqStatus: Status of requests to work on
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the outputProducer thread...")
        res = self.msOutputProducer.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total outputProducer execution time: %d secs",
                         res['execution_time'])
        self.statusOutput = res

    def ruleCleaner(self, reqStatus):
        """
        MSManager ruleCleaner function.
        It cleans the block level Rucio rules created by WMAgent and
        performs request status transition from ['announced', 'aborted-completed', 'rejected'] to
        '{normal, aborted, rejected}-archived' state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-RuleCleaner
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the ruleCleaner thread...")
        res = self.msRuleCleaner.execute(reqStatus)
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total ruleCleaner execution time: %d secs",
                         res['execution_time'])
        self.statusRuleCleaner = res

    def unmerged(self, *args, **kwargs):
        """
        MSManager unmerged function.
        It cleans the Unmerged area of the CMS LFN Namespace
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Unmerged
        """
        startTime = datetime.utcnow()
        self.logger.info("Starting the unmerged thread...")
        res = self.msUnmerged.execute()
        endTime = datetime.utcnow()
        self.updateTimeUTC(res, startTime, endTime)
        self.logger.info("Total Unmerged execution time: %d secs",
                         res['execution_time'])
        self.statusUnmerged = res

    def stop(self):
        "Stop MSManager"
        status = None
        # stop MSMonitor thread
        if 'monitor' in self.services and hasattr(self, 'monitThread'):
            self.monitThread.stop()
            status = self.monitThread.running()
        # stop MSTransferor thread
        if 'transferor' in self.services and hasattr(self, 'transfThread'):
            self.transfThread.stop()  # stop checkStatus thread
            status = self.transfThread.running()
        # stop MSOutput threads
        if 'output' in self.services and hasattr(self, 'outputConsumerThread'):
            self.outputConsumerThread.stop()
            status = self.outputConsumerThread.running()
        if 'output' in self.services and hasattr(self, 'outputProducerThread'):
            self.outputProducerThread.stop()
            status = self.outputProducerThread.running()
        # stop MSRuleCleaner thread
        if 'ruleCleaner' in self.services and hasattr(self,
                                                      'ruleCleanerThread'):
            self.ruleCleanerThread.stop()
            status = self.ruleCleanerThread.running()
        return status

    def info(self, reqName=None):
        """
        Return transfer information for a given request
        :param reqName: request name
        :return: data transfer information for this request
        """
        data = {"request": reqName, "transferDoc": None}
        if reqName:
            # obtain the transfer information for a given request records from couchdb for given request
            if 'monitor' in self.services:
                transferDoc = self.msMonitor.reqmgrAux.getTransferInfo(reqName)
            elif 'transferor' in self.services:
                transferDoc = self.msTransferor.reqmgrAux.getTransferInfo(
                    reqName)
            elif 'output' in self.services:
                transferDoc = self.msOutputProducer.getTransferInfo(reqName)
            if transferDoc:
                # it's always a single document in Couch
                data['transferDoc'] = transferDoc[0]
        return data

    def delete(self, request):
        "Delete request in backend"
        pass

    def status(self, detail):
        """
        Return the current status of a MicroService and a summary
        of its last execution activity.
        :param detail: boolean used to retrieve some extra information
          regarding the service
        :return: a dictionary
        """
        data = {"status": "OK"}
        if detail and 'transferor' in self.services:
            data.update(self.statusTrans)
        elif detail and 'monitor' in self.services:
            data.update(self.statusMon)
        elif detail and 'output' in self.services:
            data.update(self.statusOutput)
        elif detail and 'ruleCleaner' in self.services:
            data.update(self.statusRuleCleaner)
        return data

    def updateTimeUTC(self, reportDict, startT, endT):
        """
        Given a report summary dictionary and start/end time, update
        the report with human readable timing information
        :param reportDict: summary dictionary
        :param startT: epoch start time for a given service
        :param endT: epoch end time for a given service
        """
        reportDict['start_time'] = startT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['end_time'] = endT.strftime("%a, %d %b %Y %H:%M:%S UTC")
        reportDict['execution_time'] = (endT - startT).total_seconds()