Exemple #1
0
def agentsSites(url):
    "Return list of sites known in CMS WMAgents"
    sites_ready_in_agent = set()
    headers = {'Accept': 'application/json'}
    params = {}
    mgr = RequestHandler()
    res = mgr.getdata(url,
                      params=params,
                      headers=headers,
                      ckey=ckey(),
                      cert=cert())
    data = json.loads(res)
    agents = {}
    for r in [i['value'] for i in data['rows']]:
        team = r['agent_team']
        if team != 'production':
            continue
        agents.setdefault(team, []).append(r)
    for team, agents in viewitems(agents):
        for agent in agents:
            if agent['status'] != 'ok':
                continue
            for site, sinfo in viewitems(agent['WMBS_INFO']['thresholds']):
                if sinfo['state'] in ['Normal']:
                    sites_ready_in_agent.add(site)
    return sites_ready_in_agent
Exemple #2
0
def renewRucioToken(rucioAuthUrl, userToken):
    """
    Provided a user Rucio token, check it's lifetime and extend it by another hour
    :param rucioAuthUrl: url to the rucio authentication server
    :param rucioAcct: rucio account to be used
    :return: a datetime.datetime object with the new token lifetime
    """
    params = {}
    headers = {"X-Rucio-Auth-Token": userToken}

    url = '%s/auth/validate' % rucioAuthUrl
    logging.info("Renewing the Rucio token...")
    mgr = RequestHandler()
    res = mgr.getdata(url,
                      params=params,
                      headers=headers,
                      ckey=ckey(),
                      cert=cert())
    try:
        newExpiration = eval(res)['lifetime']
    except Exception as exc:
        raise RuntimeError(
            "Failed to renew Rucio token. Response: {} Error: {}".format(
                res, str(exc)))
    return newExpiration
Exemple #3
0
class MicroServiceTest(unittest.TestCase):
    "Unit test for MicroService module"

    def setUp(self):
        "Setup MicroService for testing"
        self.app = ServiceManager()
        config = TestConfig
        manager = 'WMCore_t.Services_t.MicroService_t.MicroService_t.ServiceManager'
        config.views.data.manager = manager
        config.manager = manager
        mount = '/microservice'
        self.mgr = RequestHandler()
        self.port = config.main.port
        self.url = 'http://localhost:%s%s/data' % (self.port, mount)
        cherrypy.config["server.socket_port"] = self.port
        self.server = RestInterface(self.app, config, mount)
        cherrypy.tree.mount(self.server, mount)
        cherrypy.engine.start()

    def tearDown(self):
        "Tear down MicroService"
        cherrypy.engine.exit()
        cherrypy.engine.stop()

    def postRequest(self, params):
        "Perform POST request to our MicroService"
        headers = {'Content-type': 'application/json'}
        print("### post call %s params=%s headers=%s" %
              (self.url, params, headers))
        data = self.mgr.getdata(self.url, params=params, headers=headers, \
                verb='POST', cert=cert(), ckey=ckey())
        print("### post call data %s" % data)
        return json.loads(data)

    def test_getState(self):
        "Test function for getting state of the MicroService"
        url = '%s/status' % self.url
        data = self.mgr.getdata(url, params={})
        state = "bla"
        data = {"request": {"state": state}}
        self.postRequest(data)
        data = self.mgr.getdata(url, params={})
        data = json.loads(data)
        print("### url=%s, data=%s" % (url, data))
        for row in data['result']:
            if 'state' in row:
                self.assertEqual(state, row['state'])
Exemple #4
0
 def getSpec(self, request, reqSpecs=None):
     "Get request from workload cache"
     if reqSpecs and request['RequestName'] in reqSpecs:
         return reqSpecs[request['RequestName']]
     url = str('%s/%s/spec' % (self.msConfig['reqmgrCacheUrl'], request['RequestName']))
     mgr = RequestHandler()
     data = mgr.getdata(url, params={}, cert=cert(), ckey=ckey())
     return pickle.loads(data)
Exemple #5
0
def findParent(dataset):
    "Helper function to find a parent of the dataset"
    url = '%s/datasetparents' % dbsUrl()
    params = {'dataset': dataset}
    headers = {'Accept': 'application/json'}
    mgr = RequestHandler()
    data = mgr.getdata(url, params=params, headers=headers, cert=cert(), ckey=ckey())
    return [str(i['parent_dataset']) for i in json.loads(data)]
Exemple #6
0
def getSpec(request, reqSpecs=None):
    "Get request from workload cache"
    if reqSpecs and request['RequestName'] in reqSpecs:
        return reqSpecs[request['RequestName']]
    url = str('%s/%s/spec' % (reqmgrCacheUrl(), request['RequestName']))
    mgr = RequestHandler()
    data = mgr.getdata(url, params={}, cert=cert(), ckey=ckey())
    return pickle.loads(data)
Exemple #7
0
class MicroServiceTest(unittest.TestCase):
    "Unit test for MicroService module"
    def setUp(self):
        "Setup MicroService for testing"
        self.app = ServiceManager()
        config = TestConfig
        manager = 'WMCore_t.Services_t.MicroService_t.MicroService_t.ServiceManager'
        config.views.data.manager = manager
        config.manager = manager
        mount = '/microservice'
        self.mgr = RequestHandler()
        self.port = config.main.port
        self.url = 'http://localhost:%s%s/data' % (self.port, mount)
        cherrypy.config["server.socket_port"] = self.port
        self.server = RestApiHub(self.app, config, mount)
        cherrypy.tree.mount(self.server, mount)
        cherrypy.engine.start()

    def tearDown(self):
        "Tear down MicroService"
        cherrypy.engine.exit()
        cherrypy.engine.stop()

    def postRequest(self, params):
        "Perform POST request to our MicroService"
        headers = {'Content-type': 'application/json'}
        print("### post call %s params=%s headers=%s" % (self.url, params, headers))
        data = self.mgr.getdata(self.url, params=params, headers=headers, \
                verb='POST', cert=cert(), ckey=ckey())
        print("### post call data %s" % data)
        return json.loads(data)

    def test_getState(self):
        "Test function for getting state of the MicroService"
        url = '%s/status' % self.url
        data = self.mgr.getdata(url, params={})
        state = "bla"
        data = {"request":{"state": state}}
        self.postRequest(data)
        data = self.mgr.getdata(url, params={})
        data = json.loads(data)
        print("### url=%s, data=%s" % (url, data))
        for row in data['result']:
            if 'state' in row:
                self.assertEqual(state, row['state'])
Exemple #8
0
def getNodes(kind):
    "Get list of PhEDEx nodes"
    params = {}
    headers = {'Accept': 'application/json'}
    url = '%s/nodes' % phedexUrl()
    mgr = RequestHandler()
    data = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
    nodes = json.loads(data)['phedex']['node']
    return [node['name'] for node in nodes if node['kind'] == kind]
Exemple #9
0
def getNodes(kind):
    "Get list of PhEDEx nodes"
    params = {}
    headers = {'Accept': 'application/json'}
    url = '%s/nodes' % phedexUrl()
    mgr = RequestHandler()
    data = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
    nodes = json.loads(data)['phedex']['node']
    return [node['name'] for node in nodes if node['kind'] == kind]
Exemple #10
0
def getWorkflows(state):
    "Get list of workflows from ReqMgr2 data-service"
    url = '%s/data/request' % reqmgrUrl()
    headers = {'Accept': 'application/json'}
    params = {'status': state}
    mgr = RequestHandler()
    res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
    data = json.loads(res)
    return data.get('result', [])
Exemple #11
0
def getWorkflow(requestName):
    "Get list of workflow info from ReqMgr2 data-service for given request name"
    headers = {'Accept': 'application/json'}
    params = {}
    url = '%s/data/request/%s' % (reqmgrUrl(), requestName)
    mgr = RequestHandler()
    res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
    data = json.loads(res)
    return data.get('result', [])
Exemple #12
0
def getNodesForId(phedexid):
    "Helper function to get nodes for given phedex id"
    url = '%s/requestlist' % phedexUrl()
    params = {'request': str(phedexid)}
    headers = {'Accept': 'application/json'}
    mgr = RequestHandler()
    data = mgr.getdata(url, params, headers, ckey=ckey(), cert=cert())
    items = json.loads(data)['phedex']['request']
    nodes = [n['name'] for i in items for n in i['node']]
    return list(set(nodes))
Exemple #13
0
def getRequest(url, params):
    "Helper function to GET data from given URL"
    mgr = RequestHandler()
    headers = {'Accept': 'application/json'}
    verbose = 0
    if 'verbose' in params:
        verbose = params['verbose']
        del params['verbose']
    data = mgr.getdata(url, params, headers, ckey=ckey(), cert=cert(), verbose=verbose)
    return data
Exemple #14
0
def getNodesForId(phedexid):
    "Helper function to get nodes for given phedex id"
    url = '%s/requestlist' % phedexUrl()
    params = {'request': str(phedexid)}
    headers = {'Accept': 'application/json'}
    mgr = RequestHandler()
    data = mgr.getdata(url, params, headers, ckey=ckey(), cert=cert())
    items = json.loads(data)['phedex']['request']
    nodes = [n['name'] for i in items for n in i['node']]
    return list(set(nodes))
Exemple #15
0
def getdata(url, params, headers=None):
    "Helper function to get data from the service"
    ckey, cert = getKeyCertFromEnv()
    mgr = RequestHandler()
    res = mgr.getdata(url,
                      params=params,
                      headers=headers,
                      ckey=ckey,
                      cert=cert)
    return json.loads(res)
Exemple #16
0
def postRequest(url, params):
    "Helper function to POST request to given URL"
    mgr = RequestHandler()
    headers = {'Accept': 'application/json'}
    verbose = 0
    if 'verbose' in params:
        verbose = params['verbose']
        del params['verbose']
    data = mgr.getdata(url, params, headers, ckey=ckey(), cert=cert(), \
                       verb='POST', verbose=verbose)
    return data
Exemple #17
0
def getDetoxQuota(url):
    "Get list of workflow info from ReqMgr2 data-service for given request name"
    headers = {}
    params = {}
    mgr = RequestHandler()
    res = mgr.getdata(url,
                      params=params,
                      headers=headers,
                      ckey=ckey(),
                      cert=cert())
    res = res.split('\n')
    return res
Exemple #18
0
def findParent(dataset, dbsUrl):
    "Helper function to find a parent of the dataset"
    url = '%s/datasetparents' % dbsUrl
    params = {'dataset': dataset}
    headers = {'Accept': 'application/json'}
    mgr = RequestHandler()
    data = mgr.getdata(url,
                       params=params,
                       headers=headers,
                       cert=cert(),
                       ckey=ckey())
    return [str(i['parent_dataset']) for i in json.loads(data)]
Exemple #19
0
def getNodeQueues():
    "Helper function to fetch nodes usage from PhEDEx data service"
    headers = {'Accept': 'application/json'}
    params = {}
    mgr = RequestHandler()
    url = '%s/nodeusagehistory' % phedexUrl()
    res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
    data = json.loads(res)
    ret = defaultdict(int)
    for node in data['phedex']['node']:
        for usage in node['usage']:
            ret[node['name']] += int(usage['miss_bytes'] / 1023.**4) #in TB
    return ret
Exemple #20
0
def getWorkflows(state):
    "Get list of workflows from ReqMgr2 data-service"
    url = '%s/data/request' % reqmgrUrl()
    headers = {'Accept': 'application/json'}
    params = {'status': state}
    mgr = RequestHandler()
    res = mgr.getdata(url,
                      params=params,
                      headers=headers,
                      ckey=ckey(),
                      cert=cert())
    data = json.loads(res)
    return data.get('result', [])
Exemple #21
0
def getWorkflow(requestName, reqMgrUrl):
    "Get list of workflow info from ReqMgr2 data-service for given request name"
    headers = {'Accept': 'application/json'}
    params = {}
    url = '%s/data/request/%s' % (reqMgrUrl, requestName)
    mgr = RequestHandler()
    res = mgr.getdata(url,
                      params=params,
                      headers=headers,
                      ckey=ckey(),
                      cert=cert())
    data = json.loads(res)
    return data.get('result', [])
Exemple #22
0
def getNodeQueues():
    "Helper function to fetch nodes usage from PhEDEx data service"
    headers = {'Accept': 'application/json'}
    params = {}
    mgr = RequestHandler()
    url = '%s/nodeusagehistory' % phedexUrl()
    res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
    data = json.loads(res)
    ret = defaultdict(int)
    for node in data['phedex']['node']:
        for usage in node['usage']:
            ret[node['name']] += int(usage['miss_bytes'] / 1023.**4) #in TB
    return ret
Exemple #23
0
def workqueueRequests(state=None):
    "Helper functions to get requests from WorkQueue"
    url = workqueueView('jobsByRequest')
    if state:
        pass # we may need to use state when we'll query WorkQueue
    params = {}
    headers = {'Accept': 'application/json'}
    mgr = RequestHandler()
    data = mgr.getdata(url, params=params, headers=headers, cert=cert(), ckey=ckey())
    data = json.loads(data)
    rdict = {}
    for row in data.get('rows', []):
        rdict[row['key']] = row['value']
    return rdict
Exemple #24
0
    def _postRequest(self, url, params, verb='POST', verbose=0):
        "Helper function to POST request to given URL"
        mgr = RequestHandler(logger=self.logger)
        headers = copy(self.configDict['headers'])
        headers.update({"Authorization": self._token})

        try:
            data = mgr.getdata(url,
                               params,
                               headers,
                               verb=verb,
                               verbose=verbose)
            return json.loads(data)
        except Exception as exc:
            self.logger.error("Failed to retrieve data from MonIT. Error: %s",
                              str(exc))
            return None
Exemple #25
0
def workqueueRequests(state=None):
    "Helper functions to get requests from WorkQueue"
    url = workqueueView('jobsByRequest')
    if state:
        pass  # we may need to use state when we'll query WorkQueue
    params = {}
    headers = {'Accept': 'application/json'}
    mgr = RequestHandler()
    data = mgr.getdata(url,
                       params=params,
                       headers=headers,
                       cert=cert(),
                       ckey=ckey())
    data = json.loads(data)
    rdict = {}
    for row in data.get('rows', []):
        rdict[row['key']] = row['value']
    return rdict
Exemple #26
0
class AuxCacheUpdateTasks(CherryPyPeriodicTask):
    """
    Updates Aux db update periodically. (i.e. TagCollector)
    """
    def __init__(self, rest, config):

        super(AuxCacheUpdateTasks, self).__init__(config)
        self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger)
        self.mgr = RequestHandler()

    def setConcurrentTasks(self, config):
        """
        sets the list of functions which
        """
        self.concurrentTasks = [{
            'func': self.updateAuxiliarDocs,
            'duration': config.tagCollectDuration
        }]

    def updateAuxiliarDocs(self, config):
        """
        Update the central couch database with auxiliary documents
        that need to be constanly updated whenever an update is
        made at the data source
        """
        self.logger.info("Updating auxiliary couch documents ...")

        self.reqmgrAux.populateCMSSWVersion(config.tagcollect_url,
                                            **config.tagcollect_args)

        try:
            data = self.mgr.getdata(config.unified_url,
                                    params={},
                                    headers={'Accept': 'application/json'})
            data = json.loads(data)
        except Exception as ex:
            msg = "Failed to retrieve unified configuration from github. Error: %s" % str(
                ex)
            msg += "\nRetrying again in the next cycle"
            self.logger.error(msg)
            return

        self.reqmgrAux.updateUnifiedConfig(data, docName="config")
Exemple #27
0
def alterSubscription(phedexid, decision, comments, nodes=None):
    "Helper function to alter subscriptions for given phedex id and nodes"
    mgr = RequestHandler()
    headers = {'Accept': 'application/json'}
    nodes = nodes if nodes else getNodesForId(phedexid)
    params = {
        'decision': decision,
        'request': phedexid,
        'node': ','.join(nodes),
        'comments': comments
        }
    url = '%s/updaterequest'
    data = mgr.getdata(url, params, headers, ckey=ckey(), cert=cert(), verb='POST')
    result = json.loads(data)
    if not result:
        return False
    if 'already' in result:
        return True
    return result
Exemple #28
0
def agentsSites(url):
    "Return list of sites known in CMS WMAgents"
    sites_ready_in_agent = set()
    headers = {'Accept': 'application/json'}
    params = {}
    mgr = RequestHandler()
    res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
    data = json.loads(res)
    agents = {}
    for r in [i['value'] for i in data['rows']]:
        team = r['agent_team']
        if team != 'production':
            continue
        agents.setdefault(team, []).append(r)
    for team, agents in agents.items():
        for agent in agents:
            if agent['status'] != 'ok':
                continue
            for site, sinfo in agent['WMBS_INFO']['thresholds'].iteritems():
                if sinfo['state'] in ['Normal']:
                    sites_ready_in_agent.add(site)
    return sites_ready_in_agent
Exemple #29
0
def alterSubscription(phedexid, decision, comments, nodes=None):
    "Helper function to alter subscriptions for given phedex id and nodes"
    mgr = RequestHandler()
    headers = {'Accept': 'application/json'}
    nodes = nodes if nodes else getNodesForId(phedexid)
    params = {
        'decision': decision,
        'request': phedexid,
        'node': ','.join(nodes),
        'comments': comments
    }
    url = '%s/updaterequest'
    data = mgr.getdata(url,
                       params,
                       headers,
                       ckey=ckey(),
                       cert=cert(),
                       verb='POST')
    result = json.loads(data)
    if not result:
        return False
    if 'already' in result:
        return True
    return result
Exemple #30
0
class MSRuleCleaner(MSCore):
    """
    MSRuleCleaner.py class provides the logic used to clean the Rucio
    block level data placement rules created by WMAgent.
    """
    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSRuleCleaner module
        :param msConfig: micro service configuration
        """
        super(MSRuleCleaner, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        self.msConfig.setdefault("services", ['ruleCleaner'])
        self.msConfig.setdefault("rucioWmaAccount", "wma_test")
        self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor")
        self.msConfig.setdefault('enableRealMode', False)
        self.mode = "RealMode" if self.msConfig[
            'enableRealMode'] else "DryRunMode"
        self.emailAlert = EmailAlert(self.msConfig)
        self.curlMgr = RequestHandler()

        # Building all the Pipelines:
        pName = 'plineMSTrCont'
        self.plineMSTrCont = Pipeline(name=pName,
                                      funcLine=[
                                          Functor(self.setPlineMarker, pName),
                                          Functor(self.cleanRucioRules)
                                      ])
        pName = 'plineMSTrBlock'
        self.plineMSTrBlock = Pipeline(name=pName,
                                       funcLine=[
                                           Functor(self.setPlineMarker, pName),
                                           Functor(self.cleanRucioRules)
                                       ])
        pName = 'plineAgentCont'
        self.plineAgentCont = Pipeline(
            name=pName,
            funcLine=[
                Functor(self.setPlineMarker, pName),
                Functor(self.getRucioRules, 'container',
                        self.msConfig['rucioWmaAccount']),
                Functor(self.cleanRucioRules)
            ])
        pName = 'plineAgentBlock'
        self.plineAgentBlock = Pipeline(
            name=pName,
            funcLine=[
                Functor(self.setPlineMarker, pName),
                Functor(self.getRucioRules, 'block',
                        self.msConfig['rucioWmaAccount']),
                Functor(self.cleanRucioRules)
            ])
        pName = 'plineArchive'
        self.plineArchive = Pipeline(name=pName,
                                     funcLine=[
                                         Functor(self.setPlineMarker, pName),
                                         Functor(self.setClean),
                                         Functor(self.archive)
                                     ])

        # Building the different set of plines we will need later:
        # NOTE: The following are all the functional pipelines which are supposed to include
        #       a cleanup function and report cleanup status in the MSRuleCleanerWflow object
        self.cleanuplines = [
            self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont,
            self.plineAgentBlock
        ]
        # Building an auxiliary list of cleanup pipeline names only:
        self.cleanupPipeNames = [pline.name for pline in self.cleanuplines]

        # Building lists of pipelines related only to Agents or MStransferror
        self.agentlines = [self.plineAgentCont, self.plineAgentBlock]
        self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock]

        # Initialization of the 'cleaned' and 'archived' counters:
        self.wfCounters = {'cleaned': {}, 'archived': 0}

    def resetCounters(self):
        """
        A simple function for zeroing the cleaned and archived counters.
        """
        for pline in self.cleanuplines:
            self.wfCounters['cleaned'][pline.name] = 0
        self.wfCounters['archived'] = 0

    def execute(self, reqStatus):
        """
        Executes the whole ruleCleaner logic
        :return: summary
        """
        # start threads in MSManager which should call this method
        summary = dict(RULECLEANER_REPORT)

        self.currThread = current_thread()
        self.currThreadIdent = self.currThread.name
        self.updateReportDict(summary, "thread_id", self.currThreadIdent)
        self.resetCounters()

        self.logger.info("MSRuleCleaner is running in mode: %s.", self.mode)

        # Build the list of workflows to work on:
        try:
            requestRecords = {}
            for status in reqStatus:
                requestRecords.update(self.getRequestRecords(status))
        except Exception as err:  # general error
            msg = "Unknown exception while fetching requests from ReqMgr2. Error: %s", str(
                err)
            self.logger.exception(msg)
            self.updateReportDict(summary, "error", msg)

        # Call _execute() and feed the relevant pipeline with the objects popped from requestRecords
        try:
            totalNumRequests, cleanNumRequests, archivedNumRequests = self._execute(
                requestRecords)
            msg = "\nNumber of processed workflows: %s."
            msg += "\nNumber of properly cleaned workflows: %s."
            msg += "\nNumber of archived workflows: %s."
            self.logger.info(msg, totalNumRequests, cleanNumRequests,
                             archivedNumRequests)
            self.updateReportDict(summary, "total_num_requests",
                                  totalNumRequests)
            self.updateReportDict(summary, "clean_num_requests",
                                  cleanNumRequests)
            self.updateReportDict(summary, "archived_num_requests",
                                  archivedNumRequests)
        except Exception as ex:
            msg = "Unknown exception while running MSRuleCleaner thread Error: %s"
            self.logger.exception(msg, str(ex))
            self.updateReportDict(summary, "error", msg)

        return summary

    def _execute(self, reqRecords):
        """
        Executes the MSRuleCleaner pipelines based on the workflow status
        :param reqList: A list of RequestRecords to work on
        :return:        a tuple with:
                            number of properly cleaned requests
                            number of processed workflows
                            number of archived workflows
        """
        # NOTE: The Input Cleanup, the Block Level Cleanup and the Archival
        #       Pipelines are executed sequentially in the above order.
        #       This way we assure ourselves that we archive only workflows
        #       that have accomplished the needed cleanup

        cleanNumRequests = 0
        totalNumRequests = 0

        # Call the workflow dispatcher:
        for _, req in reqRecords.items():
            wflow = MSRuleCleanerWflow(req)
            self._dispatchWflow(wflow)
            msg = "\n----------------------------------------------------------"
            msg += "\nMSRuleCleanerWflow: %s"
            msg += "\n----------------------------------------------------------"
            self.logger.debug(msg, pformat(wflow))
            totalNumRequests += 1
            if self._checkClean(wflow):
                cleanNumRequests += 1

        # Report the counters:
        for pline in self.cleanuplines:
            msg = "Workflows cleaned by pipeline: %s: %d"
            self.logger.info(msg, pline.name,
                             self.wfCounters['cleaned'][pline.name])
        archivedNumRequests = self.wfCounters['archived']
        self.logger.info("Workflows archived: %d", self.wfCounters['archived'])
        return totalNumRequests, cleanNumRequests, archivedNumRequests

    def _dispatchWflow(self, wflow):
        """
        A function intended to dispatch a workflow (e.g based on its status)
        through one or more functional pipelines in case there is some more
        complicated logic involved in the order we execute them but not just
        a sequentially
        """
        self.logger.debug("Dispatching workflow: %s", wflow['RequestName'])
        # NOTE: The following dispatch logic is a subject to be changed at any time

        # Resolve:
        # NOTE: First resolve any preliminary flags that will be needed further
        #       in the logic of the _dispatcher() itself
        if wflow['RequestStatus'] == 'announced':
            self.getMSOutputTransferInfo(wflow)

        # Clean:
        # Do not clean any Resubmission, but still let them be archived
        if wflow['RequestType'] == 'Resubmission':
            wflow['ForceArchive'] = True
            msg = "Skipping cleanup step for workflow: %s - RequestType is %s."
            msg += " Will try to archive it directly."
            self.logger.info(msg, wflow['RequestName'], wflow['RequestType'])
        elif wflow['RequestStatus'] in ['rejected', 'aborted-completed']:
            # NOTE: We do not check the ParentageResolved flag for these
            #       workflows, but we do need to clean output data placement
            #       rules from the agents for them
            for pline in self.agentlines:
                try:
                    pline.run(wflow)
                except Exception as ex:
                    msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. "
                    msg += "\nWill retry again in the next cycle."
                    self.logger.exception(msg, pline.name,
                                          wflow['RequestName'], str(ex))
                    continue
                if wflow['CleanupStatus'][pline.name]:
                    self.wfCounters['cleaned'][pline.name] += 1
        elif wflow['RequestStatus'] == 'announced' and not wflow[
                'ParentageResolved']:
            # NOTE: We skip workflows which are not having 'ParentageResolved'
            #       flag, but we still need some proper logging for them.
            msg = "Skipping workflow: %s - 'ParentageResolved' flag set to false."
            msg += " Will retry again in the next cycle."
            self.logger.info(msg, wflow['RequestName'])
        elif wflow[
                'RequestStatus'] == 'announced' and not wflow['TransferDone']:
            # NOTE: We skip workflows which have not yet finalised their TransferStatus
            #       in MSOutput, but we still need some proper logging for them.
            msg = "Skipping workflow: %s - 'TransferStatus' is 'pending' or 'TransferInfo' is missing in MSOutput."
            msg += " Will retry again in the next cycle."
            self.logger.info(msg, wflow['RequestName'])
        elif wflow['RequestStatus'] == 'announced':
            for pline in self.cleanuplines:
                try:
                    pline.run(wflow)
                except Exception as ex:
                    msg = "%s: General error from pipeline. Workflow: %s. Error:  \n%s. "
                    msg += "\nWill retry again in the next cycle."
                    self.logger.exception(msg, pline.name,
                                          wflow['RequestName'], str(ex))
                    continue
                if wflow['CleanupStatus'][pline.name]:
                    self.wfCounters['cleaned'][pline.name] += 1
        else:
            # We shouldn't be here:
            msg = "Skipping workflow: %s - "
            msg += "Does not fall under any of the defined categories."
            self.logger.error(msg, wflow['RequestName'])

        # Archive:
        try:
            self.plineArchive.run(wflow)
            self.wfCounters['archived'] += 1
        except MSRuleCleanerArchival as ex:
            msg = "%s: Archival Error: %s. "
            msg += " Will retry again in the next cycle."
            self.logger.error(msg, wflow['PlineMarkers'][-1], ex.message())
        except Exception as ex:
            msg = "%s General error from pipeline. Workflow: %s. Error: \n%s. "
            msg += "\nWill retry again in the next cycle."
            self.logger.exception(msg, wflow['PlineMarkers'][-1],
                                  wflow['RequestName'], str(ex))

    def setPlineMarker(self, wflow, pName):
        """
        A function intended to mark which is the pipeline currently working
        on the workflow. It is supposed to be called always as a first function
        in the pipeline.
        :param  wflow:   A MSRuleCleaner workflow representation
        :param  pName:   The name of the functional pipeline
        :return wflow:
        """
        # NOTE: The current functional pipeline MUST always be appended at the
        #       end of the 'PlineMarkers' list

        # First get rid of the default:
        if not wflow['PlineMarkers']:
            wflow['PlineMarkers'] = []

        # Then push our current value into the markers list:
        wflow['PlineMarkers'].append(pName)

        # Populate the list of flags to be used later:
        if pName not in wflow['RulesToClean']:
            if pName in self.cleanupPipeNames:
                wflow['RulesToClean'][pName] = []
        if pName not in wflow['CleanupStatus']:
            if pName in self.cleanupPipeNames:
                wflow['CleanupStatus'][pName] = False
        return wflow

    def _checkClean(self, wflow):
        """
        An auxiliary function used to only check the temporary cleanup status.
        It basically takes the pipelines registered in 'PlineMarkers' that have
        already worked on the workflow as a mask and applies this mask over
        the set of flags in the 'CleanupStatus' field and then reduces the
        result to a single bool value
        """
        # NOTE: This is one of the few functions taking a workflow as an argument
        #       but returning a bool, since it is an auxiliary function and is not
        #       supposed to be called as a standalone function in a pipeline.
        # NOTE: `all([]) == True`, ergo all the 'rejected' && 'aborted-completed' workflows
        #       are also counted as properly cleaned and can trigger archival later

        # Build a list of bool flags based on the mask of PlineMarkers
        cleanFlagsList = [
            wflow['CleanupStatus'][key] for key in wflow['PlineMarkers']
            if key in wflow['CleanupStatus'].keys()
        ]

        # If no one have worked on the workflow set the clean status to false
        if not wflow['PlineMarkers']:
            cleanStatus = False
        # If we have a mask longer than the list of flags avoid false positives
        # because of the behavior explained above - `all([]) == True`
        elif not cleanFlagsList:
            cleanStatus = False
        # Figure out the final value
        else:
            cleanStatus = all(cleanFlagsList)
        return cleanStatus

    def setClean(self, wflow):
        """
        A function to set the 'IsClean' flag based on the status from all the
        pipelines which have worked on the workflow (and have put their markers
        in the 'PlineMarkers' list)
        :param  wflow:      A MSRuleCleaner workflow representation
        :return wflow:
        """
        wflow['IsClean'] = self._checkClean(wflow)
        return wflow

    def archive(self, wflow):
        """
        Move the workflow to the proper archived status after checking
        the full cleanup status
        :param  wflow:      A MSRuleCleaner workflow representation
        :param  archStatus: Target status to transition after archival
        :return wflow:
        """
        # NOTE: check allowed status transitions with:
        #       https://github.com/dmwm/WMCore/blob/5961d2229b1e548e58259c06af154f33bce36c68/src/python/WMCore/ReqMgr/DataStructs/RequestStatus.py#L171
        if not (wflow['IsClean'] or wflow['ForceArchive']):
            msg = "Not properly cleaned workflow: %s" % wflow['RequestName']
            raise MSRuleCleanerArchival(msg)

        # Check the available status transitions before we decide the final status
        targetStatusList = RequestStatus.REQUEST_STATE_TRANSITION.get(
            wflow['RequestStatus'], [])
        self.logger.info("targetStatusList: %s", targetStatusList)
        return wflow

    def getMSOutputTransferInfo(self, wflow):
        """
        Fetches the transfer information from the MSOutput REST interface for
        the given workflow.
        :param  wflow:   A MSRuleCleaner workflow representation
        :return wflow:
        """
        headers = {'Accept': 'application/json'}
        params = {}
        url = '%s/data/info?request=%s' % (self.msConfig['msOutputUrl'],
                                           wflow['RequestName'])
        try:
            res = self.curlMgr.getdata(url,
                                       params=params,
                                       headers=headers,
                                       ckey=ckey(),
                                       cert=cert())
            data = json.loads(res)['result'][0]
            transferInfo = data['transferDoc']
        except Exception as ex:
            msg = "General exception while fetching TransferInfo from MSOutput for %s. "
            msg += "Error: %s"
            self.logger.exception(msg, wflow['RequestName'], str(ex))
        if transferInfo is not None and transferInfo[
                'TransferStatus'] == 'done':
            wflow['TransferDone'] = True
        return wflow

    def getRucioRules(self, wflow, gran, rucioAcct):
        """
        Queries Rucio and builds the relevant list of blocklevel rules for
        the given workflow
        :param  wflow:   A MSRuleCleaner workflow representation
        :param  gran:    Data granularity to search for Rucio rules. Possible values:
                        'block' || 'container'
        :return:        wflow
        """
        currPline = wflow['PlineMarkers'][-1]
        # Find all the output placement rules created by the agents
        for dataCont in wflow['OutputDatasets']:
            if gran == 'container':
                for rule in self.rucio.listDataRules(dataCont,
                                                     account=rucioAcct):
                    wflow['RulesToClean'][currPline].append(rule['id'])
            elif gran == 'block':
                try:
                    blocks = self.rucio.getBlocksInContainer(dataCont)
                    for block in blocks:
                        for rule in self.rucio.listDataRules(
                                block, account=rucioAcct):
                            wflow['RulesToClean'][currPline].append(rule['id'])
                except WMRucioDIDNotFoundException:
                    msg = "Container: %s not found in Rucio for workflow: %s."
                    self.logger.info(msg, dataCont, wflow['RequestName'])
        return wflow

    def cleanRucioRules(self, wflow):
        """
        Cleans all the Rules present in the field 'RulesToClean' in the MSRuleCleaner
        workflow representation. And fills the relevant Cleanup Status.
        :param wflow:   A MSRuleCleaner workflow representation
        :return:        wflow
        """
        # NOTE: The function should be called independently and sequentially from
        #       The Input and the respective BlockLevel pipelines.

        # NOTE: The current functional pipeline is always the last one in the PlineMarkers list
        currPline = wflow['PlineMarkers'][-1]
        delResults = []
        if self.msConfig['enableRealMode']:
            for rule in wflow['RulesToClean'][currPline]:
                self.logger.info("%s: Deleting ruleId: %s ", currPline, rule)
                delResult = self.rucio.deleteRule(rule)
                delResults.append(delResult)
                if not delResult:
                    self.logger.warning("%s: Failed to delete ruleId: %s ",
                                        currPline, rule)
        else:
            for rule in wflow['RulesToClean'][currPline]:
                delResults.append(True)
                self.logger.info("%s: DRY-RUN: Is about to delete ruleId: %s ",
                                 currPline, rule)

        # Set the cleanup flag:
        wflow['CleanupStatus'][currPline] = all(delResults)
        # ----------------------------------------------------------------------
        # FIXME : To be removed once the plineMSTrBlock && plineMSTrCont are
        #         developed
        if wflow['CleanupStatus'][currPline] in [
                'plineMSTrBlock', 'plineMSTrCont'
        ]:
            wflow['CleanupStatus'][currPline] = True
        # ----------------------------------------------------------------------
        return wflow

    def getRequestRecords(self, reqStatus):
        """
        Queries ReqMgr2 for requests in a given status.
        :param reqStatus: The status for the requests to be fetched from ReqMgr2
        :return requests: A dictionary with all the workflows in the given status
        """
        self.logger.info("Fetching requests in status: %s", reqStatus)
        result = self.reqmgr2.getRequestByStatus([reqStatus], detail=True)
        if not result:
            requests = {}
        else:
            requests = result[0]
        self.logger.info('  retrieved %s requests in status: %s',
                         len(requests), reqStatus)
        return requests
Exemple #31
0
class MSRuleCleaner(MSCore):
    """
    MSRuleCleaner.py class provides the logic used to clean the Rucio
    block level data placement rules created by WMAgent.
    """

    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSRuleCleaner module
        :param msConfig: micro service configuration
        """
        super(MSRuleCleaner, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        self.msConfig.setdefault("services", ['ruleCleaner'])
        self.msConfig.setdefault("rucioWmaAccount", "wma_test")
        self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor")
        self.msConfig.setdefault('enableRealMode', False)

        self.mode = "RealMode" if self.msConfig['enableRealMode'] else "DryRunMode"
        self.curlMgr = RequestHandler()
        self.targetStatusRegex = re.compile(r'.*archived')
        self.logDB = LogDB(self.msConfig["logDBUrl"],
                           self.msConfig["logDBReporter"],
                           logger=self.logger)
        self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger)

        # Building all the Pipelines:
        pName = 'plineMSTrCont'
        self.plineMSTrCont = Pipeline(name=pName,
                                      funcLine=[Functor(self.setPlineMarker, pName),
                                                Functor(self.setParentDatasets),
                                                Functor(self.getRucioRules, 'container', self.msConfig['rucioMStrAccount']),
                                                Functor(self.cleanRucioRules)])
        pName = 'plineMSTrBlock'
        self.plineMSTrBlock = Pipeline(name=pName,
                                       funcLine=[Functor(self.setPlineMarker, pName),
                                                 Functor(self.setParentDatasets),
                                                 Functor(self.getRucioRules, 'block', self.msConfig['rucioMStrAccount']),
                                                 Functor(self.cleanRucioRules)])
        pName = 'plineAgentCont'
        self.plineAgentCont = Pipeline(name=pName,
                                       funcLine=[Functor(self.setPlineMarker, pName),
                                                 Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']),
                                                 Functor(self.cleanRucioRules)])
        pName = 'plineAgentBlock'
        self.plineAgentBlock = Pipeline(name=pName,
                                        funcLine=[Functor(self.setPlineMarker, pName),
                                                  Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']),
                                                  Functor(self.cleanRucioRules)])
        pName = 'plineArchive'
        self.plineArchive = Pipeline(name=pName,
                                     funcLine=[Functor(self.setPlineMarker, pName),
                                               Functor(self.findTargetStatus),
                                               Functor(self.setClean),
                                               Functor(self.setArchivalDelayExpired),
                                               Functor(self.setLogDBClean),
                                               Functor(self.archive)])

        # Building the different set of plines we will need later:
        # NOTE: The following are all the functional pipelines which are supposed to include
        #       a cleanup function and report cleanup status in the MSRuleCleanerWflow object
        self.cleanuplines = [self.plineMSTrCont,
                             self.plineMSTrBlock,
                             self.plineAgentCont,
                             self.plineAgentBlock]
        # Building an auxiliary list of cleanup pipeline names only:
        self.cleanupPipeNames = [pline.name for pline in self.cleanuplines]

        # Building lists of pipelines related only to Agents or MStransferror
        self.agentlines = [self.plineAgentCont,
                           self.plineAgentBlock]
        self.mstrlines = [self.plineMSTrCont,
                          self.plineMSTrBlock]

        # Initialization of the 'cleaned' and 'archived' counters:
        self.wfCounters = {'cleaned': {},
                           'archived': {'normalArchived': 0,
                                        'forceArchived': 0}}
        self.globalLocks = set()

    def getGlobalLocks(self):
        """
        Fetches the list of 'globalLocks' from wmstats server and the list of
        'parentLocks' from request manager. Stores/updates the unified set in
        the 'globalLocks' instance variable. Returns the resultant unified set.
        :return: A union set of the 'globalLocks' and the 'parentLocks' lists
        """
        self.logger.info("Fetching globalLocks list from wmstats server.")
        try:
            globalLocks = set(self.wmstatsSvc.getGlobalLocks())
        except Exception as ex:
            msg = "Failed to refresh global locks list for the current polling cycle. Error: %s "
            msg += "Skipping this polling cycle."
            self.logger.error(msg, str(ex))
            raise ex
        self.logger.info("Fetching parentLocks list from reqmgr2 server.")
        try:
            parentLocks = set(self.reqmgr2.getParentLocks())
        except Exception as ex:
            msg = "Failed to refresh parent locks list for the current poling cycle. Error: %s "
            msg += "Skipping this polling cycle."
            self.logger.error(msg, str(ex))
            raise ex
        self.globalLocks = globalLocks | parentLocks

    def resetCounters(self):
        """
        A simple function for zeroing the cleaned and archived counters.
        """
        for pline in self.cleanuplines:
            self.wfCounters['cleaned'][pline.name] = 0
        self.wfCounters['archived']['normalArchived'] = 0
        self.wfCounters['archived']['forceArchived'] = 0

    def execute(self, reqStatus):
        """
        Executes the whole ruleCleaner logic
        :return: summary
        """
        # start threads in MSManager which should call this method
        summary = dict(RULECLEANER_REPORT)

        self.currThread = current_thread()
        self.currThreadIdent = self.currThread.name
        self.updateReportDict(summary, "thread_id", self.currThreadIdent)
        self.resetCounters()
        self.logger.info("MSRuleCleaner is running in mode: %s.", self.mode)

        # Build the list of workflows to work on:
        try:
            requestRecords = {}
            for status in reqStatus:
                requestRecords.update(self.getRequestRecords(status))
        except Exception as err:  # general error
            msg = "Unknown exception while fetching requests from ReqMgr2. Error: %s", str(err)
            self.logger.exception(msg)
            self.updateReportDict(summary, "error", msg)

        # Call _execute() and feed the relevant pipeline with the objects popped from requestRecords
        try:
            self.getGlobalLocks()
            totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests = self._execute(requestRecords)
            msg = "\nNumber of processed workflows: %s."
            msg += "\nNumber of properly cleaned workflows: %s."
            msg += "\nNumber of normally archived workflows: %s."
            msg += "\nNumber of force archived workflows: %s."
            self.logger.info(msg,
                             totalNumRequests,
                             cleanNumRequests,
                             normalArchivedNumRequests,
                             forceArchivedNumRequests)
            self.updateReportDict(summary, "total_num_requests", totalNumRequests)
            self.updateReportDict(summary, "clean_num_requests", cleanNumRequests)
            self.updateReportDict(summary, "normal_archived_num_requests", normalArchivedNumRequests)
            self.updateReportDict(summary, "force_archived_num_requests", forceArchivedNumRequests)
        except Exception as ex:
            msg = "Unknown exception while running MSRuleCleaner thread Error: %s"
            self.logger.exception(msg, str(ex))
            self.updateReportDict(summary, "error", msg)

        return summary

    def _execute(self, reqRecords):
        """
        Executes the MSRuleCleaner pipelines based on the workflow status
        :param reqList: A list of RequestRecords to work on
        :return:        a tuple with:
                            number of properly cleaned requests
                            number of processed workflows
                            number of archived workflows
        """
        # NOTE: The Input Cleanup, the Block Level Cleanup and the Archival
        #       Pipelines are executed sequentially in the above order.
        #       This way we assure ourselves that we archive only workflows
        #       that have accomplished the needed cleanup

        cleanNumRequests = 0
        totalNumRequests = 0

        # Call the workflow dispatcher:
        for req in viewvalues(reqRecords):
            wflow = MSRuleCleanerWflow(req)
            self._dispatchWflow(wflow)
            msg = "\n----------------------------------------------------------"
            msg += "\nMSRuleCleanerWflow: %s"
            msg += "\n----------------------------------------------------------"
            self.logger.debug(msg, pformat(wflow))
            totalNumRequests += 1
            if self._checkClean(wflow):
                cleanNumRequests += 1

        # Report the counters:
        for pline in self.cleanuplines:
            msg = "Workflows cleaned by pipeline: %s: %d"
            self.logger.info(msg, pline.name, self.wfCounters['cleaned'][pline.name])
        normalArchivedNumRequests = self.wfCounters['archived']['normalArchived']
        forceArchivedNumRequests = self.wfCounters['archived']['forceArchived']
        self.logger.info("Workflows normally archived: %d", self.wfCounters['archived']['normalArchived'])
        self.logger.info("Workflows force archived: %d", self.wfCounters['archived']['forceArchived'])
        return totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests

    def _dispatchWflow(self, wflow):
        """
        A function intended to dispatch a workflow (e.g based on its status)
        through one or more functional pipelines in case there is some more
        complicated logic involved in the order we execute them but not just
        a sequentially
        """
        self.logger.debug("Dispatching workflow: %s", wflow['RequestName'])
        # NOTE: The following dispatch logic is a subject to be changed at any time

        # Resolve:
        # NOTE: First resolve any preliminary flags that will be needed further
        #       in the logic of the _dispatcher() itself
        if wflow['RequestStatus'] == 'announced':
            self.getMSOutputTransferInfo(wflow)

        # Clean:
        # Do not clean any Resubmission, but still let them be archived
        if wflow['RequestType'] == 'Resubmission':
            wflow['ForceArchive'] = True
            msg = "Skipping cleanup step for workflow: %s - RequestType is %s."
            msg += " Will try to archive it directly."
            self.logger.info(msg, wflow['RequestName'], wflow['RequestType'])
        elif wflow['RequestStatus'] in ['rejected', 'aborted-completed']:
            # NOTE: We do not check the ParentageResolved flag for these
            #       workflows, but we do need to clean output data placement
            #       rules from the agents for them
            for pline in self.agentlines:
                try:
                    pline.run(wflow)
                except Exception as ex:
                    msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. "
                    msg += "\nWill retry again in the next cycle."
                    self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex))
                    continue
                if wflow['CleanupStatus'][pline.name]:
                    self.wfCounters['cleaned'][pline.name] += 1
        elif wflow['RequestStatus'] == 'announced' and not wflow['ParentageResolved']:
            # NOTE: We skip workflows which are not having 'ParentageResolved'
            #       flag, but we still need some proper logging for them.
            msg = "Skipping workflow: %s - 'ParentageResolved' flag set to false."
            msg += " Will retry again in the next cycle."
            self.logger.info(msg, wflow['RequestName'])
        elif wflow['RequestStatus'] == 'announced' and not wflow['TransferDone']:
            # NOTE: We skip workflows which have not yet finalised their TransferStatus
            #       in MSOutput, but we still need some proper logging for them.
            msg = "Skipping workflow: %s - 'TransferStatus' is 'pending' or 'TransferInfo' is missing in MSOutput."
            msg += " Will retry again in the next cycle."
            self.logger.info(msg, wflow['RequestName'])
        elif wflow['RequestStatus'] == 'announced' and not wflow['TransferTape']:
            # NOTE: We skip workflows which have not yet finalised their tape transfers.
            #       (i.e. even if a single output which is supposed to be covered
            #       by a tape rule is in any of the following transient states:
            #       {REPLICATING, STUCK, SUSPENDED, WAITING_APPROVAL}.)
            #       We still need some proper logging for them.
            msg = "Skipping workflow: %s - tape transfers are not yet completed."
            msg += " Will retry again in the next cycle."
            self.logger.info(msg, wflow['RequestName'])
        elif wflow['RequestStatus'] == 'announced':
            for pline in self.cleanuplines:
                try:
                    pline.run(wflow)
                except MSRuleCleanerResolveParentError as ex:
                    msg = "%s: Parentage Resolve Error: %s. "
                    msg += "Will retry again in the next cycle."
                    self.logger.error(msg, pline.name, str(ex))
                    continue
                except Exception as ex:
                    msg = "%s: General error from pipeline. Workflow: %s. Error:  \n%s. "
                    msg += "\nWill retry again in the next cycle."
                    self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex))
                    continue
                if wflow['CleanupStatus'][pline.name]:
                    self.wfCounters['cleaned'][pline.name] += 1
        else:
            # We shouldn't be here:
            msg = "Skipping workflow: %s - "
            msg += "Does not fall under any of the defined categories."
            self.logger.error(msg, wflow['RequestName'])

        # Archive:
        try:
            self.plineArchive.run(wflow)
            if wflow['ForceArchive']:
                self.wfCounters['archived']['forceArchived'] += 1
            else:
                self.wfCounters['archived']['normalArchived'] += 1
        except MSRuleCleanerArchivalSkip as ex:
            msg = "%s: Proper conditions not met: %s. "
            msg += "Skipping archival in the current cycle."
            self.logger.info(msg, wflow['PlineMarkers'][-1], str(ex))
        except MSRuleCleanerArchivalError as ex:
            msg = "%s: Archival Error: %s. "
            msg += "Will retry again in the next cycle."
            self.logger.error(msg, wflow['PlineMarkers'][-1], str(ex))
        except Exception as ex:
            msg = "%s General error from pipeline. Workflow: %s. Error: \n%s. "
            msg += "\nWill retry again in the next cycle."
            self.logger.exception(msg, wflow['PlineMarkers'][-1], wflow['RequestName'], str(ex))

    def setPlineMarker(self, wflow, pName):
        """
        A function intended to mark which is the pipeline currently working
        on the workflow. It is supposed to be called always as a first function
        in the pipeline.
        :param  wflow:   A MSRuleCleaner workflow representation
        :param  pName:   The name of the functional pipeline
        :return:         The workflow object
        """
        # NOTE: The current functional pipeline MUST always be appended at the
        #       end of the 'PlineMarkers' list

        # First get rid of the default:
        if not wflow['PlineMarkers']:
            wflow['PlineMarkers'] = []

        # Then push our current value into the markers list:
        wflow['PlineMarkers'].append(pName)

        # Populate the list of flags to be used later:
        if pName not in wflow['RulesToClean']:
            if pName in self.cleanupPipeNames:
                wflow['RulesToClean'][pName] = []
        if pName not in wflow['CleanupStatus']:
            if pName in self.cleanupPipeNames:
                wflow['CleanupStatus'][pName] = False
        return wflow

    def _checkClean(self, wflow):
        """
        An auxiliary function used to only check the temporary cleanup status.
        It basically takes the pipelines registered in 'PlineMarkers' that have
        already worked on the workflow as a mask and applies this mask over
        the set of flags in the 'CleanupStatus' field and then reduces the
        result to a single bool value
        """
        # NOTE: This is one of the few functions taking a workflow as an argument
        #       but returning a bool, since it is an auxiliary function and is not
        #       supposed to be called as a standalone function in a pipeline.
        # NOTE: `all([]) == True`, ergo all the 'rejected' && 'aborted-completed' workflows
        #       are also counted as properly cleaned and can trigger archival later

        # Build a list of bool flags based on the mask of PlineMarkers
        cleanFlagsList = [wflow['CleanupStatus'][key]
                          for key in wflow['PlineMarkers']
                          if key in wflow['CleanupStatus']]

        # If no one have worked on the workflow set the clean status to false
        if not wflow['PlineMarkers']:
            cleanStatus = False
        # If we have a mask longer than the list of flags avoid false positives
        # because of the behavior explained above - `all([]) == True`
        elif not cleanFlagsList:
            cleanStatus = False
        # Figure out the final value
        else:
            cleanStatus = all(cleanFlagsList)
        return cleanStatus

    def setClean(self, wflow):
        """
        A function to set the 'IsClean' flag based on the status from all the
        pipelines which have worked on the workflow (and have put their markers
        in the 'PlineMarkers' list)
        :param  wflow:      A MSRuleCleaner workflow representation
        :return:            The workflow object
        """
        wflow['IsClean'] = self._checkClean(wflow)
        return wflow

    def _checkLogDBClean(self, wflow):
        """
        An auxiliary function used to only check the LogDB cleanup status.
        It makes a query to LogDB in order to verify there are no any records for
        the current workflow
        :param wflow:       A MSRuleCleaner workflow representation
        :return:            True if no records were found in LogDB about wflow
        """
        cleanStatus = False
        logDBRecords = self.logDB.get(wflow['RequestName'])
        self.logger.debug("logDBRecords: %s", pformat(logDBRecords))
        if not logDBRecords:
            cleanStatus = True
        return cleanStatus

    def setLogDBClean(self, wflow):
        """
        A function to set the 'IsLogDBClean' flag based on the presence of any
        records in LogDB for the current workflow.
        :param  wflow:      A MSRuleCleaner workflow representation
        :return:            The workflow object
        """
        wflow['IsLogDBClean'] = self._checkLogDBClean(wflow)
        if not wflow['IsLogDBClean'] and wflow['IsArchivalDelayExpired']:
            wflow['IsLogDBClean'] = self._cleanLogDB(wflow)
        return wflow

    def _cleanLogDB(self, wflow):
        """
        A function to be used for cleaning all the records related to a workflow in logDB.
        :param wflow:       A MSRuleCleaner workflow representation
        :return:            True if NO errors were encountered while deleting
                            records from LogDB
        """
        cleanStatus = False
        try:
            if self.msConfig['enableRealMode']:
                self.logger.info("Deleting %s records from LogDB WMStats...", wflow['RequestName'])
                res = self.logDB.delete(wflow['RequestName'], agent=False)
                if res == 'delete-error':
                    msg = "Failed to delete logDB docs for wflow: %s" % wflow['RequestName']
                    raise MSRuleCleanerArchivalError(msg)
                cleanStatus = True
            else:
                self.logger.info("DRY-RUN: NOT Deleting %s records from LogDB WMStats...", wflow['RequestName'])
        except Exception as ex:
            msg = "General Exception while cleaning LogDB records for wflow: %s : %s"
            self.logger.exception(msg, wflow['RequestName'], str(ex))
        return cleanStatus

    def findTargetStatus(self, wflow):
        """
        Find the proper targeted archival status
        :param  wflow:      A MSRuleCleaner workflow representation
        :return:            The workflow object
        """
        # Check the available status transitions before we decide the final status
        targetStatusList = RequestStatus.REQUEST_STATE_TRANSITION.get(wflow['RequestStatus'], [])
        for status in targetStatusList:
            if self.targetStatusRegex.match(status):
                wflow['TargetStatus'] = status
        self.logger.debug("TargetStatus: %s", wflow['TargetStatus'])
        return wflow

    def _checkArchDelayExpired(self, wflow):
        """
        A function to check Archival Expiration Delay based on the information
        returned by WMStatsServer regarding the time of the last request status transition
        :param wflow:      MSRuleCleaner workflow representation
        :return:           True if the archival delay have been expired
        """
        archDelayExpired = False
        currentTime = int(time.time())
        threshold = self.msConfig['archiveDelayHours'] * 3600
        try:
            lastTransitionTime = wflow['RequestTransition'][-1]['UpdateTime']
            if lastTransitionTime and (currentTime - lastTransitionTime) > threshold:
                archDelayExpired = True
        except KeyError:
            self.logger.debug("Could not find status transition history for %s", wflow['RequestName'])
        return archDelayExpired

    def setArchivalDelayExpired(self, wflow):
        """
        A function to set the 'IsArchivalDelayExpired' flag
        """
        wflow['IsArchivalDelayExpired'] = self._checkArchDelayExpired(wflow)
        return wflow

    def archive(self, wflow):
        """
        Move the workflow to the proper archived status after checking
        the full cleanup status
        :param  wflow:      A MSRuleCleaner workflow representation
        :return:            The workflow object
        """
        # Make all the needed checks before trying to archive
        if not (wflow['IsClean'] or wflow['ForceArchive']):
            msg = "Not properly cleaned workflow: %s" % wflow['RequestName']
            raise MSRuleCleanerArchivalSkip(msg)
        if not wflow['TargetStatus']:
            msg = "Could not determine which archival status to target for workflow: %s" % wflow['RequestName']
            raise MSRuleCleanerArchivalError(msg)
        if not wflow['IsLogDBClean']:
            msg = "LogDB records have not been cleaned for workflow: %s" % wflow['RequestName']
            raise MSRuleCleanerArchivalSkip(msg)
        if not wflow['IsArchivalDelayExpired']:
            msg = "Archival delay period has not yet expired for workflow: %s." % wflow['RequestName']
            raise MSRuleCleanerArchivalSkip(msg)
        if not self.msConfig['enableRealMode']:
            msg = "Real Run Mode not enabled."
            raise MSRuleCleanerArchivalSkip(msg)

        # Proceed with the actual archival:
        try:
            self.reqmgr2.updateRequestStatus(wflow['RequestName'], wflow['TargetStatus'])
            msg = "Successful status transition to: %s for workflow: %s"
            self.logger.info(msg, wflow['TargetStatus'], wflow['RequestName'])
        except Exception as ex:
            msg = "General Exception while trying status transition to: %s " % wflow['TargetStatus']
            msg += "for workflow: %s : %s" % (wflow['RequestName'], str(ex))
            raise MSRuleCleanerArchivalError(msg)
        return wflow

    def getMSOutputTransferInfo(self, wflow):
        """
        Fetches the transfer information from the MSOutput REST interface for
        the given workflow.
        :param  wflow:   A MSRuleCleaner workflow representation
        :return:         The workflow object
        """
        headers = {'Accept': 'application/json'}
        params = {}
        url = '%s/data/info?request=%s' % (self.msConfig['msOutputUrl'],
                                           wflow['RequestName'])
        try:
            res = self.curlMgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
            data = json.loads(res)['result'][0]
            transferInfo = data['transferDoc']
        except Exception as ex:
            msg = "General exception while fetching TransferInfo from MSOutput for %s. "
            msg += "Error: %s"
            self.logger.exception(msg, wflow['RequestName'], str(ex))

        # Set Transfer status - information fetched from MSOutput only
        if transferInfo is not None and transferInfo['TransferStatus'] == 'done':
            wflow['TransferDone'] = True

        # Set Tape rules status - information fetched from Rucio (tape rule ids from MSOutput)
        if transferInfo is not None and transferInfo['OutputMap']:
            tapeRulesStatusList = []
            # For setting 'TransferTape' = True we require either no tape rules for the
            # workflow have been created or all existing tape rules to be in status 'OK',
            # so every empty TapeRuleID we consider as completed.
            for mapRecord in transferInfo['OutputMap']:
                if not mapRecord['TapeRuleID']:
                    continue
                rucioRule = self.rucio.getRule(mapRecord['TapeRuleID'])
                if not rucioRule:
                    tapeRulesStatusList.append(False)
                    msg = "Tape rule: %s not found for workflow: %s "
                    msg += "Possible server side error."
                    self.logger.error(msg, mapRecord['TapeRuleID'], wflow['RequestName'])
                    continue
                if rucioRule['state'] == 'OK':
                    tapeRulesStatusList.append(True)
                    msg = "Tape rule: %s in final state: %s for workflow: %s"
                    self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName'])
                else:
                    tapeRulesStatusList.append(False)
                    msg = "Tape rule: %s in non final state: %s for workflow: %s"
                    self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName'])
            if all(tapeRulesStatusList):
                wflow['TransferTape'] = True

        return wflow

    def setParentDatasets(self, wflow):
        """
        Used to resolve parent datasets for a workflow.
        :param  wflow:   A MSRuleCleaner workflow representation
        :return:         The workflow object
        """
        if wflow['InputDataset'] and wflow['IncludeParents']:
            childDataset = wflow['InputDataset']
            parentDataset = findParent([childDataset], self.msConfig['dbsUrl'])
            # NOTE: If findParent() returned None then the DBS service failed to
            #       resolve the request (it is considered an ERROR outside WMCore)
            if parentDataset.get(childDataset, None) is None:
                msg = "Failed to resolve parent dataset for: %s in workflow: %s" % (childDataset, wflow['RequestName'])
                raise MSRuleCleanerResolveParentError(msg)
            elif parentDataset:
                wflow['ParentDataset'] = [parentDataset[childDataset]]
                msg = "Found parent %s for input dataset %s in workflow: %s "
                self.logger.info(msg, parentDataset, wflow['InputDataset'], wflow['RequestName'])
            else:
                msg = "Could not find parent for input dataset: %s in workflows: %s"
                self.logger.error(msg, wflow['InputDataset'], wflow['RequestName'])
        return wflow

    def getRucioRules(self, wflow, gran, rucioAcct):
        """
        Queries Rucio and builds the relevant list of blocklevel rules for
        the given workflow
        :param  wflow:   A MSRuleCleaner workflow representation
        :param  gran:    Data granularity to search for Rucio rules. Possible values:
                         'block' or 'container'
        :return:         The workflow object
        """
        currPline = wflow['PlineMarkers'][-1]

        # Create the container list to the rucio account map and set the checkGlobalLocks flag.
        mapRuleType = {self.msConfig['rucioWmaAccount']: ["OutputDatasets"],
                       self.msConfig['rucioMStrAccount']: ["InputDataset", "MCPileup",
                                                           "DataPileup", "ParentDataset"]}
        if rucioAcct == self.msConfig['rucioMStrAccount']:
            checkGlobalLocks = True
        else:
            checkGlobalLocks = False

        # Find all the data placement rules created by the components:
        for dataType in mapRuleType[rucioAcct]:
            dataList = wflow[dataType] if isinstance(wflow[dataType], list) else [wflow[dataType]]
            for dataCont in dataList:
                if dataCont is None:
                    continue
                self.logger.debug("getRucioRules: dataCont: %s", pformat(dataCont))
                if checkGlobalLocks and dataCont in self.globalLocks:
                    msg = "Found dataset: %s in GlobalLocks. NOT considering it for filling the "
                    msg += "RulesToClean list for both container and block level Rules for workflow: %s!"
                    self.logger.info(msg, dataCont, wflow['RequestName'])
                    continue
                if gran == 'container':
                    for rule in self.rucio.listDataRules(dataCont, account=rucioAcct):
                        wflow['RulesToClean'][currPline].append(rule['id'])
                        msg = "Found %s container-level rule to be deleted for container %s"
                        self.logger.info(msg, rule['id'], dataCont)
                elif gran == 'block':
                    try:
                        blocks = self.rucio.getBlocksInContainer(dataCont)
                        for block in blocks:
                            for rule in self.rucio.listDataRules(block, account=rucioAcct):
                                wflow['RulesToClean'][currPline].append(rule['id'])
                                msg = "Found %s block-level rule to be deleted for container %s"
                                self.logger.info(msg, rule['id'], dataCont)
                    except WMRucioDIDNotFoundException:
                        msg = "Container: %s not found in Rucio for workflow: %s."
                        self.logger.info(msg, dataCont, wflow['RequestName'])
        return wflow

    def cleanRucioRules(self, wflow):
        """
        Cleans all the Rules present in the field 'RulesToClean' in the MSRuleCleaner
        workflow representation. And fills the relevant Cleanup Status.
        :param wflow:   A MSRuleCleaner workflow representation
        :return:        The workflow object
        """
        # NOTE: The function should be called independently and sequentially from
        #       The Input and the respective BlockLevel pipelines.

        # NOTE: The current functional pipeline is always the last one in the PlineMarkers list
        currPline = wflow['PlineMarkers'][-1]
        delResults = []
        if self.msConfig['enableRealMode']:
            for rule in wflow['RulesToClean'][currPline]:
                self.logger.info("%s: Deleting ruleId: %s ", currPline, rule)
                delResult = self.rucio.deleteRule(rule)
                delResults.append(delResult)
                if not delResult:
                    self.logger.warning("%s: Failed to delete ruleId: %s ", currPline, rule)
        else:
            for rule in wflow['RulesToClean'][currPline]:
                delResults.append(True)
                self.logger.info("%s: DRY-RUN: Is about to delete ruleId: %s ", currPline, rule)

        # Set the cleanup flag:
        wflow['CleanupStatus'][currPline] = all(delResults)
        return wflow

    def getRequestRecords(self, reqStatus):
        """
        Queries ReqMgr2 for requests in a given status.
        :param reqStatus: The status for the requests to be fetched from ReqMgr2
        :return requests: A dictionary with all the workflows in the given status
        """
        self.logger.info("Fetching requests in status: %s", reqStatus)
        result = self.reqmgr2.getRequestByStatus([reqStatus], detail=True)
        if not result:
            requests = {}
        else:
            requests = result[0]
        self.logger.info('  retrieved %s requests in status: %s', len(requests), reqStatus)
        return requests
class AlertManagerAPI(object):
    """
    A class used to send alerts via the MONIT AlertManager API
    """
    def __init__(self, alertManagerUrl, logger=None):
        self.alertManagerUrl = alertManagerUrl
        # sender's hostname is added as an annotation
        self.hostname = socket.gethostname()
        self.mgr = RequestHandler()
        self.ltz = LocalTimezone()
        self.headers = {"Content-Type": "application/json"}
        self.validSeverity = ["high", "medium", "low"]
        self.logger = logger if logger else logging.getLogger()

    def sendAlert(self,
                  alertName,
                  severity,
                  summary,
                  description,
                  service,
                  tag="wmcore",
                  endSecs=600,
                  generatorURL=""):
        """
        :param alertName: a unique name for the alert
        :param severity: low, medium, high
        :param summary: a short description of the alert
        :param description: a longer informational message with details about the alert
        :param service: the name of the service firing an alert
        :param tag: a unique tag used to help route the alert
        :param endSecs: how many minutes until the alarm is silenced
        :param generatorURL: this URL will be sent to AlertManager and configured as a clickable "Source" link in the web interface

        AlertManager JSON format reference: https://www.prometheus.io/docs/alerting/latest/clients/
        [
          {
            "labels": {
            "alertname": "<requiredAlertName>",
            "<labelname>": "<labelvalue>",
            ...
          },
            "annotations": {
              "<labelname>": "<labelvalue>",
            ...
          },
            "startsAt": "<rfc3339>", # optional, will be current time if not present
            "endsAt": "<rfc3339>",
            "generatorURL": "<generator_url>" # optional
          },
        ]
        """

        if not self._isValidSeverity(severity):
            return False

        request = []
        alert = {}
        labels = {}
        annotations = {}

        # add labels
        labels["alertname"] = alertName
        labels["severity"] = severity
        labels["tag"] = tag
        labels["service"] = service
        alert["labels"] = labels

        # add annotations
        annotations["hostname"] = self.hostname
        annotations["summary"] = summary
        annotations["description"] = description
        alert["annotations"] = annotations

        # In python3 we won't need the LocalTimezone class
        # Will change to d = datetime.now().astimezone() + timedelta(seconds=endSecs)
        d = datetime.now(self.ltz) + timedelta(seconds=endSecs)
        alert["endsAt"] = d.isoformat("T")
        alert["generatorURL"] = generatorURL

        request.append(alert)
        # need to do this because pycurl_manager only accepts dict and encoded strings type
        params = json.dumps(request)

        res = self.mgr.getdata(self.alertManagerUrl,
                               params=params,
                               headers=self.headers,
                               verb='POST')

        return res

    def _isValidSeverity(self, severity):
        """
        Used to check if the severity of the alert matches the valid levels: low, medium, high
        :param severity: severity of the alert
        :return: True or False
        """
        if severity not in self.validSeverity:
            logging.critical(
                "Alert submitted to AlertManagerAPI with invalid severity: %s",
                severity)
            return False
        return True
class MicroServiceTest(unittest.TestCase):
    "Unit test for MicroService module"

    def setUp(self):
        "Setup MicroService for testing"
        self.managerName = "ServiceManager"
        config = TestConfig
        manager = 'WMCore_t.MicroService_t.MicroService_t.%s' % self.managerName
        config.views.data.manager = manager
        config.manager = manager
        mount = '/microservice/data'
        self.mgr = RequestHandler()
        self.port = config.main.port
        self.url = 'http://localhost:%s%s' % (self.port, mount)
        cherrypy.config["server.socket_port"] = self.port
        self.app = ServiceManager(config)
        self.server = RestApiHub(self.app, config, mount)
        cherrypy.tree.mount(self.server, mount)
        cherrypy.engine.start()

    def tearDown(self):
        "Tear down MicroService"
        cherrypy.engine.stop()
        cherrypy.engine.exit()

    def postRequest(self, apiName, params):
        "Perform POST request to our MicroService"
        headers = {'Content-type': 'application/json'}
        url = self.url + "/%s" % apiName
        data = self.mgr.getdata(url, params=params, headers=headers, \
                                verb='POST', cert=cert(), ckey=ckey(), encode=True, decode=True)
        print("### post call data %s" % data)
        return data

    def testGetStatus(self):
        "Test function for getting state of the MicroService"
        api = "status"
        url = '%s/%s' % (self.url, api)
        params = {}
        data = self.mgr.getdata(url, params=params, encode=True, decode=True)
        self.assertEqual(data['result'][0]['microservice'], self.managerName)
        self.assertEqual(data['result'][0]['api'], api)

        params = {"service": "transferor"}
        data = self.mgr.getdata(url, params=params, encode=True, decode=True)
        self.assertEqual(data['result'][0]['microservice'], self.managerName)
        self.assertEqual(data['result'][0]['api'], api)

    def testGetInfo(self):
        "Test function for getting state of the MicroService"
        api = "status"
        url = '%s/%s' % (self.url, api)
        params = {}
        data = self.mgr.getdata(url, params=params, encode=True, decode=True)
        self.assertEqual(data['result'][0]['microservice'], self.managerName)
        self.assertEqual(data['result'][0]['api'], api)

        params = {"request": "fake_request_name"}
        data = self.mgr.getdata(url, params=params, encode=True, decode=True)
        self.assertEqual(data['result'][0]['microservice'], self.managerName)
        self.assertEqual(data['result'][0]['api'], api)

    def testPostCall(self):
        "Test function for getting state of the MicroService"
        api = "status"
        data = {"request": "fake_request_name"}
        data = self.postRequest(api, data)
        self.assertDictEqual(data['result'][0], {
            'status': 'OK',
            'api': 'info'
        })
Exemple #34
0
def getdata(url, params, headers=None):
    "Helper function to get data from the service"
    ckey, cert = getKeyCertFromEnv()
    mgr = RequestHandler()
    res = mgr.getdata(url, params=params, headers=headers, ckey=ckey, cert=cert)
    return json.loads(res)