Beispiel #1
0
class MSCore(object):
    """
    This class provides core functionality for
    MSTransferor, MSMonitor, MSOutput. MSRuleCleaner classes.
    """

    def __init__(self, msConfig, **kwargs):
        """
        Provides setup for MSTransferor and MSMonitor classes

        :param config: MS service configuration
        :param kwargs: can be used to skip the initialization of specific services, such as:
            logger: logger object
            skipReqMgr: boolean to skip ReqMgr initialization
            skipReqMgrAux: boolean to skip ReqMgrAux initialization
            skipRucio: boolean to skip Rucio initialization
        """
        self.logger = getMSLogger(getattr(msConfig, 'verbose', False), kwargs.get("logger"))
        self.msConfig = msConfig
        self.logger.info("Configuration including default values:\n%s", self.msConfig)

        if not kwargs.get("skipReqMgr", False):
            self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'], logger=self.logger)
        if not kwargs.get("skipReqMgrAux", False):
            self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'],
                                       httpDict={'cacheduration': 1.0}, logger=self.logger)

        self.phedex = None
        self.rucio = None
        if not kwargs.get("skipRucio", False):
            self.rucio = Rucio(acct=self.msConfig['rucioAccount'],
                               hostUrl=self.msConfig['rucioUrl'],
                               authUrl=self.msConfig['rucioAuthUrl'],
                               configDict={"logger": self.logger, "user_agent": "wmcore-microservices"})

    def unifiedConfig(self):
        """
        Fetches the unified configuration
        :return: unified configuration content
        """
        res = self.reqmgrAux.getUnifiedConfig(docName="config")
        if res:
            if isinstance(res, list):
                return res[0]
            return res
        else:
            return {}

    def change(self, reqName, reqStatus, prefix='###'):
        """
        Update the request status in ReqMgr2
        """
        try:
            if self.msConfig['enableStatusTransition']:
                self.logger.info('%s updating %s status to: %s', prefix, reqName, reqStatus)
                self.reqmgr2.updateRequestStatus(reqName, reqStatus)
            else:
                self.logger.info('DRY-RUN:: %s updating %s status to: %s', prefix, reqName, reqStatus)
        except Exception as err:
            self.logger.exception("Failed to change request status. Error: %s", str(err))

    def updateReportDict(self, reportDict, keyName, value):
        """
        Provided a key name and value, validate the key name
        and update the report dictionary if it passes the validation
        :param reportDict: dictionary with a summary of the service
        :param keyName: string with the key name in the report
        :param value: string/integer value with the content of a metric
        :return: the updated dictionary
        """
        if keyName not in reportDict:
            self.logger.error("Report metric '%s' is not supported", keyName)
        else:
            reportDict[keyName] = value
        return reportDict
Beispiel #2
0
class MSManager(object):
    """
    Entry point for the MicroServices.
    This class manages both transferor and monitor services/threads.
    """
    def __init__(self, config=None, logger=None):
        """
        Setup a bunch of things, like:
         * logger for this service
         * initialize all the necessary service helpers
         * fetch the unified configuration from central couch
         * update the unified configuration with some deployment and default settings
         * start both transfer and monitor threads
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.uConfig = {}
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)

        self.reqmgr2 = ReqMgr(self.msConfig['reqmgrUrl'], logger=self.logger)
        self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgrUrl'],
                                   httpDict={'cacheduration': 60},
                                   logger=self.logger)

        # transferor has to look at workflows in assigned status
        self.msTransferor = MSTransferor(self.msConfig,
                                         "assigned",
                                         logger=self.logger)

        ### Last but not least, get the threads started
        thname = 'MSTransferor'
        self.transfThread = start_new_thread(
            thname, daemon, (self.transferor, 'assigned',
                             self.msConfig['interval'], self.logger))
        self.logger.debug("### Running %s thread %s", thname,
                          self.transfThread.running())

        thname = 'MSTransferorMonit'
        self.monitThread = start_new_thread(
            thname, daemon, (self.monitor, 'staging',
                             self.msConfig['interval'] * 2, self.logger))
        self.logger.debug("+++ Running %s thread %s", thname,
                          self.monitThread.running())

    def _parseConfig(self, config):
        """
        __parseConfig_
        Parse the MicroService configuration and set any default values.
        :param config: config as defined in the deployment
        """
        self.logger.info("Using the following config:\n%s", config)

        self.msConfig = {}
        self.msConfig['verbose'] = getattr(config, 'verbose', False)
        self.msConfig['group'] = getattr(config, 'group', 'DataOps')
        self.msConfig['interval'] = getattr(config, 'interval', 5 * 60)
        self.msConfig['readOnly'] = getattr(config, 'readOnly', True)

        self.msConfig['reqmgrUrl'] = getattr(config, 'reqmgr2Url',
                                             'https://cmsweb.cern.ch/reqmgr2')
        self.msConfig['reqmgrCacheUrl'] = self.msConfig['reqmgrUrl'].replace(
            'reqmgr2', 'couchdb/reqmgr_workload_cache')
        self.msConfig['phedexUrl'] = getattr(
            config, 'phedexUrl',
            'https://cmsweb.cern.ch/phedex/datasvc/json/prod')
        self.msConfig['dbsUrl'] = getattr(
            config, 'dbsUrl',
            'https://cmsweb.cern.ch/dbs/prod/global/DBSReader')

    def transferor(self, reqStatus):
        """
        MSManager transferor function.
        It performs Unified logic for data subscription and
        transfers requests from assigned to staging/staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startT = time.time()
        self.logger.info("Starting the transferor thread...")
        self.msTransferor.execute()
        self.logger.info("Total transferor execution time: %.2f secs",
                         time.time() - startT)

    def monitor(self, reqStatus='staging'):
        """
        MSManager monitoring function.
        It performs transfer requests from staging to staged state of ReqMgr2.
        For references see
        https://github.com/dmwm/WMCore/wiki/ReqMgr2-MicroService-Transferor
        """
        startT = time.time()
        self.logger.info("Starting the monitor thread...")
        # First, fetch/update our unified configuration from reqmgr_aux db
        # Keep our own copy of the unified config to avoid race conditions
        self.uConfig = self.reqmgrAux.getUnifiedConfig(docName="config")
        if not self.uConfig:
            self.logger.warning(
                "Monitor failed to fetch the unified config. Skipping this cycle."
            )
            return
        self.uConfig = self.uConfig[0]

        try:
            # get requests from ReqMgr2 data-service for given statue
            # here with detail=False we get back list of records
            requests = self.reqmgr2.getRequestByStatus([reqStatus],
                                                       detail=False)
            self.logger.debug('+++ monit found %s requests in %s state',
                              len(requests), reqStatus)

            requestStatus = {}  # keep track of request statuses
            for reqName in requests:
                req = {'name': reqName, 'reqStatus': reqStatus}
                # get transfer IDs
                tids = self.getTransferIDs()
                # get transfer status
                transferStatuses = self.getTransferStatuses(tids)
                # get campaing and unified configuration
                campaign = self.requestCampaign(reqName)
                conf = self.requestConfiguration(reqName)
                self.logger.debug("+++ request %s campaing %s conf %s", req,
                                  campaign, conf)

                # if all transfers are completed, move the request status staging -> staged
                # completed = self.checkSubscription(request)
                completed = 100  # TMP
                if completed == 100:  # all data are staged
                    self.logger.debug(
                        "+++ request %s all transfers are completed", req)
                    self.change(req, 'staged', '+++ monit')
                # if pileup transfers are completed AND some input blocks are completed, move the request status staging -> staged
                elif self.pileupTransfersCompleted(tids):
                    self.logger.debug(
                        "+++ request %s pileup transfers are completed", req)
                    self.change(req, 'staged', '+++ monit')
                # transfers not completed, just update the database with their completion
                else:
                    self.logger.debug(
                        "+++ request %s transfers are not completed", req)
                    requestStatus[
                        req] = transferStatuses  # TODO: implement update of transfer ids
            self.updateTransferIDs(requestStatus)
        except Exception as err:  # general error
            self.logger.exception('+++ monit error: %s', str(err))
        self.logger.info("Total monitor execution time: %.2f secs",
                         time.time() - startT)

    def stop(self):
        "Stop MSManager"
        # stop MSTransferorMonit thread
        self.monitThread.stop()
        # stop MSTransferor thread
        self.transfThread.stop()  # stop checkStatus thread
        status = self.transfThread.running()
        return status

    def getTransferIDsDoc(self):
        """
        Get transfer ids document from backend. The document has the following form:
        {
          "wf_A": [record1, record2, ...],
          "wf_B": [....],
        }
        where each record has the following format:
        {"timestamp":000, "dataset":"/a/b/c", "type": "primary", "trainsferIDs": [1,2,3]}
        """
        doc = {}
        return doc

    def updateTransferIDs(self, requestStatus):
        "Update transfer ids in backend"
        # TODO/Wait: https://github.com/dmwm/WMCore/issues/9198
        # doc = self.getTransferIDsDoc()

    def getTransferIDs(self):
        "Get transfer ids from backend"
        # TODO/Wait: https://github.com/dmwm/WMCore/issues/9198
        # meanwhile return transfer ids from internal store
        return []

    def getTransferStatuses(self, tids):
        "get transfer statuses for given transfer IDs from backend"
        # transfer docs on backend has the following form
        # https://gist.github.com/amaltaro/72599f995b37a6e33566f3c749143154
        statuses = {}
        for tid in tids:
            # TODO: I need to find request name from transfer ID
            # status = self.checkSubscription(request)
            status = 100
            statuses[tid] = status
        return statuses

    def requestCampaign(self, req):
        "Return request campaign"
        return 'campaign_TODO'  # TODO

    def requestConfiguration(self, req):
        "Return request configuration"
        return {}

    def pileupTransfersCompleted(self, tids):
        "Check if pileup transfers are completed"
        # TODO: add implementation
        return False

    def checkSubscription(self, req):
        "Send request to Phedex and return status of request subscription"
        sdict = {}
        for dataset in req.get('datasets', []):
            data = self.phedex.subscriptions(dataset=dataset,
                                             group=self.msConfig['group'])
            self.logger.debug("### dataset %s group %s", dataset,
                              self.msConfig['group'])
            self.logger.debug("### subscription %s", data)
            for row in data['phedex']['dataset']:
                if row['name'] != dataset:
                    continue
                nodes = [s['node'] for s in row['subscription']]
                rNodes = req.get('sites')
                self.logger.debug("### nodes %s %s", nodes, rNodes)
                subset = set(nodes) & set(rNodes)
                if subset == set(rNodes):
                    sdict[dataset] = 1
                else:
                    pct = float(len(subset)) / float(len(set(rNodes)))
                    sdict[dataset] = pct
        self.logger.debug("### sdict %s", sdict)
        tot = len(sdict.keys())
        if not tot:
            return -1
        # return percentage of completion
        return round(float(sum(sdict.values())) / float(tot), 2) * 100

    def checkStatus(self, req):
        "Check status of request in local storage"
        self.logger.debug("### checkStatus of request: %s", req['name'])
        # check subscription status of the request
        # completed = self.checkSubscription(req)
        completed = 100
        if completed == 100:  # all data are staged
            self.logger.debug(
                "### request is completed, change its status and remove it from the store"
            )
            self.change(req, 'staged', '### transferor')
        else:
            self.logger.debug("### request %s, completed %s", req, completed)

    def info(self, req):
        "Return info about given request"
        completed = self.checkSubscription(req)
        return {'request': req, 'status': completed}

    def delete(self, request):
        "Delete request in backend"
        pass

    def change(self, req, reqStatus, prefix='###'):
        """
        Change request status, internally it is done via PUT request to ReqMgr2:
        curl -X PUT -H "Content-Type: application/json" \
             -d '{"RequestStatus":"staging", "RequestName":"bla-bla"}' \
             https://xxx.yyy.zz/reqmgr2/data/request
        """
        self.logger.debug('%s updating %s status to %s', prefix, req['name'],
                          reqStatus)
        try:
            if not self.msConfig['readOnly']:
                self.reqmgr2.updateRequestStatus(req['name'], reqStatus)
        except Exception as err:
            self.logger.exception("Failed to change request status. Error: %s",
                                  str(err))
Beispiel #3
0
class MSTransferor(object):
    def __init__(self, microConfig, status, logger=None):
        """
        Runs the basic setup and initialization for the MS Transferor module
        :param microConfig: microservice configuration
        """
        self.msConfig = microConfig
        self.status = status
        self.uConfig = {}
        self.logger = getMSLogger(microConfig['verbose'], logger=logger)
        self.reqmgr2 = ReqMgr(microConfig['reqmgrUrl'], logger=self.logger)
        self.reqmgrAux = ReqMgrAux(microConfig['reqmgrUrl'],
                                   httpDict={'cacheduration': 60},
                                   logger=self.logger)
        # eventually will change it to Rucio
        self.phedex = PhEDEx(httpDict={'cacheduration': 10 * 60},
                             dbsUrl=microConfig['dbsUrl'],
                             logger=self.logger)

    def prep(self):
        """
        Runs any preparation tasks before executing the actual algorithm.
        For now:
         * fetches the unified configuration
        :return: False if it fail to run this action, otherwise True
        """
        self.uConfig = self.reqmgrAux.getUnifiedConfig(docName="config")
        return bool(self.uConfig)

    def execute(self):
        """
        Executes the whole transferor logic
        :param reqStatus: request status to that matters for this module
        :return:
        """
        if not self.prep():
            self.logger.warning(
                "Failed to fetch the latest unified config. Skipping this cycle"
            )
            return
        self.uConfig = self.uConfig[0]

        requestRecords = []
        try:
            # get requests from ReqMgr2 data-service for given status
            requests = self.reqmgr2.getRequestByStatus([self.status],
                                                       detail=True)
            if requests:
                requests = requests[0]
            self.logger.info("### transferor found %s requests in '%s' state",
                             len(requests), self.status)
            if requests:
                for _, wfData in requests.iteritems():
                    requestRecords.append(self.requestRecord(wfData))
        except Exception as err:
            self.logger.exception('### transferor error: %s', str(err))

        if not requestRecords:
            return

        try:
            reqInfo = RequestInfo(self.msConfig, self.uConfig, self.logger)
            for reqSlice in grouper(requestRecords, 50):
                reqResults = reqInfo(reqSlice)
                self.logger.info("%d requests completely processed.",
                                 len(reqResults))
                self.logger.info(
                    "Working on the data subscription and status change...")
                # process all requests
                for req in reqResults:
                    reqName = req['name']
                    # perform transfer
                    tid = self.transferRequest(req)
                    if tid:
                        # Once all transfer requests were successfully made, update: assigned -> staging
                        self.logger.debug(
                            "### transfer request for %s successfull", reqName)
                        self.change(req, 'staging', '### transferor')
                        # if there is nothing to be transferred (no input at all),
                        # then update the request status once again staging -> staged
                        # self.change(req, 'staged', '### transferor')
        except Exception as err:  # general error
            self.logger.exception('### transferor error: %s', str(err))

    def post(self):
        """
        Runs any post tasks before exiting the execution cycle
        :return:
        """
        pass

    def requestRecord(self, wfData):
        """
        Selects only important information for a request dictionary

        Returns: a dictionary
        """
        datasets = []
        if "TaskChain" in wfData or "StepChain" in wfData:
            innerDicts = []
            for i in range(
                    1,
                    wfData.get("TaskChain", wfData.get("StepChain")) + 1):
                innerDicts.append(
                    wfData.get("Task%d" % i, wfData.get("Step%d" % i)))
        else:
            # ReReco and DQMHarvesting
            innerDicts = [wfData]
        for item in innerDicts:
            for key in ['InputDataset', 'MCPileup', 'DataPileup']:
                dataset = item.get(key)
                if dataset:
                    datasets.append({'type': key, 'name': dataset})

        return {
            'name': wfData.get('RequestName'),
            'reqStatus': wfData.get('RequestStatus'),
            'SiteWhiteList': wfData.get('SiteWhitelist', []),
            'SiteBlackList': wfData.get('SiteBlacklist', []),
            'datasets': datasets,
            'campaign': []
        }

    def transferRequest(self, req):
        "Send request to Phedex and return status of request subscription"
        datasets = req.get('datasets', [])
        sites = req.get('sites', [])
        if datasets and sites:
            self.logger.debug("### creating subscription for: %s",
                              pformat(req))
            subscription = PhEDExSubscription(datasets, sites,
                                              self.msConfig['group'])
            # TODO: implement how to get transfer id
            tid = hashlib.md5(str(subscription)).hexdigest()
            # TODO: when ready enable submit subscription step
            # self.phedex.subscribe(subscription)
            return tid

    def change(self, req, reqStatus, prefix='###'):
        """
        Change request status, internally it is done via PUT request to ReqMgr2:
        curl -X PUT -H "Content-Type: application/json" \
             -d '{"RequestStatus":"staging", "RequestName":"bla-bla"}' \
             https://xxx.yyy.zz/reqmgr2/data/request
        """
        self.logger.debug('%s updating %s status to %s', prefix, req['name'],
                          reqStatus)
        try:
            if not self.msConfig['readOnly']:
                self.reqmgr2.updateRequestStatus(req['name'], reqStatus)
        except Exception as err:
            self.logger.exception("Failed to change request status. Error: %s",
                                  str(err))
Beispiel #4
0
class MSCore(object):
    """
    This class provides core functionality for
    MSTransferor, MSMonitor and MSOutput classes.
    """
    def __init__(self, msConfig, logger=None):
        """
        Provides setup for MSTransferor and MSMonitor classes

        :param config: MS service configuration
        :param logger: logger object (optional)
        """
        self.logger = getMSLogger(getattr(msConfig, 'verbose', False), logger)
        self.msConfig = msConfig
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)

        self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'], logger=self.logger)
        self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'],
                                   httpDict={'cacheduration': 1.0},
                                   logger=self.logger)

        # hard code it to production DBS otherwise PhEDEx subscribe API fails to match TMDB data
        dbsUrl = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
        if usingRucio():
            # FIXME: we cannot use Rucio in write mode yet
            # self.rucio = Rucio(self.msConfig['rucioAccount'], configDict={"logger": self.logger})
            self.phedex = PhEDEx(httpDict={'cacheduration': 0.5},
                                 dbsUrl=dbsUrl,
                                 logger=self.logger)
        else:
            self.phedex = PhEDEx(httpDict={'cacheduration': 0.5},
                                 dbsUrl=dbsUrl,
                                 logger=self.logger)

    def unifiedConfig(self):
        """
        Fetches the unified configuration
        :return: unified configuration content
        """
        res = self.reqmgrAux.getUnifiedConfig(docName="config")
        if res:
            if isinstance(res, list):
                return res[0]
            return res
        else:
            return {}

    def change(self, reqName, reqStatus, prefix='###'):
        """
        Update the request status in ReqMgr2
        """
        try:
            if self.msConfig['enableStatusTransition']:
                self.logger.info('%s updating %s status to: %s', prefix,
                                 reqName, reqStatus)
                self.reqmgr2.updateRequestStatus(reqName, reqStatus)
            else:
                self.logger.info('DRY-RUN:: %s updating %s status to: %s',
                                 prefix, reqName, reqStatus)
        except Exception as err:
            self.logger.exception("Failed to change request status. Error: %s",
                                  str(err))

    def updateReportDict(self, reportDict, keyName, value):
        """
        Provided a key name and value, validate the key name
        and update the report dictionary if it passes the validation
        :param reportDict: dictionary with a summary of the service
        :param keyName: string with the key name in the report
        :param value: string/integer value with the content of a metric
        :return: the updated dictionary
        """
        if keyName not in reportDict:
            self.logger.error("Report metric '%s' is not supported", keyName)
        else:
            reportDict[keyName] = value
        return reportDict