class MSRuleCleaner(MSCore): """ MSRuleCleaner.py class provides the logic used to clean the Rucio block level data placement rules created by WMAgent. """ def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSRuleCleaner module :param msConfig: micro service configuration """ super(MSRuleCleaner, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) self.msConfig.setdefault("services", ['ruleCleaner']) self.msConfig.setdefault("rucioWmaAccount", "wma_test") self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor") self.msConfig.setdefault('enableRealMode', False) self.mode = "RealMode" if self.msConfig['enableRealMode'] else "DryRunMode" self.curlMgr = RequestHandler() self.targetStatusRegex = re.compile(r'.*archived') self.logDB = LogDB(self.msConfig["logDBUrl"], self.msConfig["logDBReporter"], logger=self.logger) self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger) # Building all the Pipelines: pName = 'plineMSTrCont' self.plineMSTrCont = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.setParentDatasets), Functor(self.getRucioRules, 'container', self.msConfig['rucioMStrAccount']), Functor(self.cleanRucioRules)]) pName = 'plineMSTrBlock' self.plineMSTrBlock = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.setParentDatasets), Functor(self.getRucioRules, 'block', self.msConfig['rucioMStrAccount']), Functor(self.cleanRucioRules)]) pName = 'plineAgentCont' self.plineAgentCont = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules)]) pName = 'plineAgentBlock' self.plineAgentBlock = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules)]) pName = 'plineArchive' self.plineArchive = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.findTargetStatus), Functor(self.setClean), Functor(self.setArchivalDelayExpired), Functor(self.setLogDBClean), Functor(self.archive)]) # Building the different set of plines we will need later: # NOTE: The following are all the functional pipelines which are supposed to include # a cleanup function and report cleanup status in the MSRuleCleanerWflow object self.cleanuplines = [self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont, self.plineAgentBlock] # Building an auxiliary list of cleanup pipeline names only: self.cleanupPipeNames = [pline.name for pline in self.cleanuplines] # Building lists of pipelines related only to Agents or MStransferror self.agentlines = [self.plineAgentCont, self.plineAgentBlock] self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock] # Initialization of the 'cleaned' and 'archived' counters: self.wfCounters = {'cleaned': {}, 'archived': {'normalArchived': 0, 'forceArchived': 0}} self.globalLocks = set() def getGlobalLocks(self): """ Fetches the list of 'globalLocks' from wmstats server and the list of 'parentLocks' from request manager. Stores/updates the unified set in the 'globalLocks' instance variable. Returns the resultant unified set. :return: A union set of the 'globalLocks' and the 'parentLocks' lists """ self.logger.info("Fetching globalLocks list from wmstats server.") try: globalLocks = set(self.wmstatsSvc.getGlobalLocks()) except Exception as ex: msg = "Failed to refresh global locks list for the current polling cycle. Error: %s " msg += "Skipping this polling cycle." self.logger.error(msg, str(ex)) raise ex self.logger.info("Fetching parentLocks list from reqmgr2 server.") try: parentLocks = set(self.reqmgr2.getParentLocks()) except Exception as ex: msg = "Failed to refresh parent locks list for the current poling cycle. Error: %s " msg += "Skipping this polling cycle." self.logger.error(msg, str(ex)) raise ex self.globalLocks = globalLocks | parentLocks def resetCounters(self): """ A simple function for zeroing the cleaned and archived counters. """ for pline in self.cleanuplines: self.wfCounters['cleaned'][pline.name] = 0 self.wfCounters['archived']['normalArchived'] = 0 self.wfCounters['archived']['forceArchived'] = 0 def execute(self, reqStatus): """ Executes the whole ruleCleaner logic :return: summary """ # start threads in MSManager which should call this method summary = dict(RULECLEANER_REPORT) self.currThread = current_thread() self.currThreadIdent = self.currThread.name self.updateReportDict(summary, "thread_id", self.currThreadIdent) self.resetCounters() self.logger.info("MSRuleCleaner is running in mode: %s.", self.mode) # Build the list of workflows to work on: try: requestRecords = {} for status in reqStatus: requestRecords.update(self.getRequestRecords(status)) except Exception as err: # general error msg = "Unknown exception while fetching requests from ReqMgr2. Error: %s", str(err) self.logger.exception(msg) self.updateReportDict(summary, "error", msg) # Call _execute() and feed the relevant pipeline with the objects popped from requestRecords try: self.getGlobalLocks() totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests = self._execute(requestRecords) msg = "\nNumber of processed workflows: %s." msg += "\nNumber of properly cleaned workflows: %s." msg += "\nNumber of normally archived workflows: %s." msg += "\nNumber of force archived workflows: %s." self.logger.info(msg, totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests) self.updateReportDict(summary, "total_num_requests", totalNumRequests) self.updateReportDict(summary, "clean_num_requests", cleanNumRequests) self.updateReportDict(summary, "normal_archived_num_requests", normalArchivedNumRequests) self.updateReportDict(summary, "force_archived_num_requests", forceArchivedNumRequests) except Exception as ex: msg = "Unknown exception while running MSRuleCleaner thread Error: %s" self.logger.exception(msg, str(ex)) self.updateReportDict(summary, "error", msg) return summary def _execute(self, reqRecords): """ Executes the MSRuleCleaner pipelines based on the workflow status :param reqList: A list of RequestRecords to work on :return: a tuple with: number of properly cleaned requests number of processed workflows number of archived workflows """ # NOTE: The Input Cleanup, the Block Level Cleanup and the Archival # Pipelines are executed sequentially in the above order. # This way we assure ourselves that we archive only workflows # that have accomplished the needed cleanup cleanNumRequests = 0 totalNumRequests = 0 # Call the workflow dispatcher: for req in viewvalues(reqRecords): wflow = MSRuleCleanerWflow(req) self._dispatchWflow(wflow) msg = "\n----------------------------------------------------------" msg += "\nMSRuleCleanerWflow: %s" msg += "\n----------------------------------------------------------" self.logger.debug(msg, pformat(wflow)) totalNumRequests += 1 if self._checkClean(wflow): cleanNumRequests += 1 # Report the counters: for pline in self.cleanuplines: msg = "Workflows cleaned by pipeline: %s: %d" self.logger.info(msg, pline.name, self.wfCounters['cleaned'][pline.name]) normalArchivedNumRequests = self.wfCounters['archived']['normalArchived'] forceArchivedNumRequests = self.wfCounters['archived']['forceArchived'] self.logger.info("Workflows normally archived: %d", self.wfCounters['archived']['normalArchived']) self.logger.info("Workflows force archived: %d", self.wfCounters['archived']['forceArchived']) return totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests def _dispatchWflow(self, wflow): """ A function intended to dispatch a workflow (e.g based on its status) through one or more functional pipelines in case there is some more complicated logic involved in the order we execute them but not just a sequentially """ self.logger.debug("Dispatching workflow: %s", wflow['RequestName']) # NOTE: The following dispatch logic is a subject to be changed at any time # Resolve: # NOTE: First resolve any preliminary flags that will be needed further # in the logic of the _dispatcher() itself if wflow['RequestStatus'] == 'announced': self.getMSOutputTransferInfo(wflow) # Clean: # Do not clean any Resubmission, but still let them be archived if wflow['RequestType'] == 'Resubmission': wflow['ForceArchive'] = True msg = "Skipping cleanup step for workflow: %s - RequestType is %s." msg += " Will try to archive it directly." self.logger.info(msg, wflow['RequestName'], wflow['RequestType']) elif wflow['RequestStatus'] in ['rejected', 'aborted-completed']: # NOTE: We do not check the ParentageResolved flag for these # workflows, but we do need to clean output data placement # rules from the agents for them for pline in self.agentlines: try: pline.run(wflow) except Exception as ex: msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex)) continue if wflow['CleanupStatus'][pline.name]: self.wfCounters['cleaned'][pline.name] += 1 elif wflow['RequestStatus'] == 'announced' and not wflow['ParentageResolved']: # NOTE: We skip workflows which are not having 'ParentageResolved' # flag, but we still need some proper logging for them. msg = "Skipping workflow: %s - 'ParentageResolved' flag set to false." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced' and not wflow['TransferDone']: # NOTE: We skip workflows which have not yet finalised their TransferStatus # in MSOutput, but we still need some proper logging for them. msg = "Skipping workflow: %s - 'TransferStatus' is 'pending' or 'TransferInfo' is missing in MSOutput." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced' and not wflow['TransferTape']: # NOTE: We skip workflows which have not yet finalised their tape transfers. # (i.e. even if a single output which is supposed to be covered # by a tape rule is in any of the following transient states: # {REPLICATING, STUCK, SUSPENDED, WAITING_APPROVAL}.) # We still need some proper logging for them. msg = "Skipping workflow: %s - tape transfers are not yet completed." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced': for pline in self.cleanuplines: try: pline.run(wflow) except MSRuleCleanerResolveParentError as ex: msg = "%s: Parentage Resolve Error: %s. " msg += "Will retry again in the next cycle." self.logger.error(msg, pline.name, str(ex)) continue except Exception as ex: msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex)) continue if wflow['CleanupStatus'][pline.name]: self.wfCounters['cleaned'][pline.name] += 1 else: # We shouldn't be here: msg = "Skipping workflow: %s - " msg += "Does not fall under any of the defined categories." self.logger.error(msg, wflow['RequestName']) # Archive: try: self.plineArchive.run(wflow) if wflow['ForceArchive']: self.wfCounters['archived']['forceArchived'] += 1 else: self.wfCounters['archived']['normalArchived'] += 1 except MSRuleCleanerArchivalSkip as ex: msg = "%s: Proper conditions not met: %s. " msg += "Skipping archival in the current cycle." self.logger.info(msg, wflow['PlineMarkers'][-1], str(ex)) except MSRuleCleanerArchivalError as ex: msg = "%s: Archival Error: %s. " msg += "Will retry again in the next cycle." self.logger.error(msg, wflow['PlineMarkers'][-1], str(ex)) except Exception as ex: msg = "%s General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, wflow['PlineMarkers'][-1], wflow['RequestName'], str(ex)) def setPlineMarker(self, wflow, pName): """ A function intended to mark which is the pipeline currently working on the workflow. It is supposed to be called always as a first function in the pipeline. :param wflow: A MSRuleCleaner workflow representation :param pName: The name of the functional pipeline :return: The workflow object """ # NOTE: The current functional pipeline MUST always be appended at the # end of the 'PlineMarkers' list # First get rid of the default: if not wflow['PlineMarkers']: wflow['PlineMarkers'] = [] # Then push our current value into the markers list: wflow['PlineMarkers'].append(pName) # Populate the list of flags to be used later: if pName not in wflow['RulesToClean']: if pName in self.cleanupPipeNames: wflow['RulesToClean'][pName] = [] if pName not in wflow['CleanupStatus']: if pName in self.cleanupPipeNames: wflow['CleanupStatus'][pName] = False return wflow def _checkClean(self, wflow): """ An auxiliary function used to only check the temporary cleanup status. It basically takes the pipelines registered in 'PlineMarkers' that have already worked on the workflow as a mask and applies this mask over the set of flags in the 'CleanupStatus' field and then reduces the result to a single bool value """ # NOTE: This is one of the few functions taking a workflow as an argument # but returning a bool, since it is an auxiliary function and is not # supposed to be called as a standalone function in a pipeline. # NOTE: `all([]) == True`, ergo all the 'rejected' && 'aborted-completed' workflows # are also counted as properly cleaned and can trigger archival later # Build a list of bool flags based on the mask of PlineMarkers cleanFlagsList = [wflow['CleanupStatus'][key] for key in wflow['PlineMarkers'] if key in wflow['CleanupStatus']] # If no one have worked on the workflow set the clean status to false if not wflow['PlineMarkers']: cleanStatus = False # If we have a mask longer than the list of flags avoid false positives # because of the behavior explained above - `all([]) == True` elif not cleanFlagsList: cleanStatus = False # Figure out the final value else: cleanStatus = all(cleanFlagsList) return cleanStatus def setClean(self, wflow): """ A function to set the 'IsClean' flag based on the status from all the pipelines which have worked on the workflow (and have put their markers in the 'PlineMarkers' list) :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ wflow['IsClean'] = self._checkClean(wflow) return wflow def _checkLogDBClean(self, wflow): """ An auxiliary function used to only check the LogDB cleanup status. It makes a query to LogDB in order to verify there are no any records for the current workflow :param wflow: A MSRuleCleaner workflow representation :return: True if no records were found in LogDB about wflow """ cleanStatus = False logDBRecords = self.logDB.get(wflow['RequestName']) self.logger.debug("logDBRecords: %s", pformat(logDBRecords)) if not logDBRecords: cleanStatus = True return cleanStatus def setLogDBClean(self, wflow): """ A function to set the 'IsLogDBClean' flag based on the presence of any records in LogDB for the current workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ wflow['IsLogDBClean'] = self._checkLogDBClean(wflow) if not wflow['IsLogDBClean'] and wflow['IsArchivalDelayExpired']: wflow['IsLogDBClean'] = self._cleanLogDB(wflow) return wflow def _cleanLogDB(self, wflow): """ A function to be used for cleaning all the records related to a workflow in logDB. :param wflow: A MSRuleCleaner workflow representation :return: True if NO errors were encountered while deleting records from LogDB """ cleanStatus = False try: if self.msConfig['enableRealMode']: self.logger.info("Deleting %s records from LogDB WMStats...", wflow['RequestName']) res = self.logDB.delete(wflow['RequestName'], agent=False) if res == 'delete-error': msg = "Failed to delete logDB docs for wflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalError(msg) cleanStatus = True else: self.logger.info("DRY-RUN: NOT Deleting %s records from LogDB WMStats...", wflow['RequestName']) except Exception as ex: msg = "General Exception while cleaning LogDB records for wflow: %s : %s" self.logger.exception(msg, wflow['RequestName'], str(ex)) return cleanStatus def findTargetStatus(self, wflow): """ Find the proper targeted archival status :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # Check the available status transitions before we decide the final status targetStatusList = RequestStatus.REQUEST_STATE_TRANSITION.get(wflow['RequestStatus'], []) for status in targetStatusList: if self.targetStatusRegex.match(status): wflow['TargetStatus'] = status self.logger.debug("TargetStatus: %s", wflow['TargetStatus']) return wflow def _checkArchDelayExpired(self, wflow): """ A function to check Archival Expiration Delay based on the information returned by WMStatsServer regarding the time of the last request status transition :param wflow: MSRuleCleaner workflow representation :return: True if the archival delay have been expired """ archDelayExpired = False currentTime = int(time.time()) threshold = self.msConfig['archiveDelayHours'] * 3600 try: lastTransitionTime = wflow['RequestTransition'][-1]['UpdateTime'] if lastTransitionTime and (currentTime - lastTransitionTime) > threshold: archDelayExpired = True except KeyError: self.logger.debug("Could not find status transition history for %s", wflow['RequestName']) return archDelayExpired def setArchivalDelayExpired(self, wflow): """ A function to set the 'IsArchivalDelayExpired' flag """ wflow['IsArchivalDelayExpired'] = self._checkArchDelayExpired(wflow) return wflow def archive(self, wflow): """ Move the workflow to the proper archived status after checking the full cleanup status :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # Make all the needed checks before trying to archive if not (wflow['IsClean'] or wflow['ForceArchive']): msg = "Not properly cleaned workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not wflow['TargetStatus']: msg = "Could not determine which archival status to target for workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalError(msg) if not wflow['IsLogDBClean']: msg = "LogDB records have not been cleaned for workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not wflow['IsArchivalDelayExpired']: msg = "Archival delay period has not yet expired for workflow: %s." % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not self.msConfig['enableRealMode']: msg = "Real Run Mode not enabled." raise MSRuleCleanerArchivalSkip(msg) # Proceed with the actual archival: try: self.reqmgr2.updateRequestStatus(wflow['RequestName'], wflow['TargetStatus']) msg = "Successful status transition to: %s for workflow: %s" self.logger.info(msg, wflow['TargetStatus'], wflow['RequestName']) except Exception as ex: msg = "General Exception while trying status transition to: %s " % wflow['TargetStatus'] msg += "for workflow: %s : %s" % (wflow['RequestName'], str(ex)) raise MSRuleCleanerArchivalError(msg) return wflow def getMSOutputTransferInfo(self, wflow): """ Fetches the transfer information from the MSOutput REST interface for the given workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ headers = {'Accept': 'application/json'} params = {} url = '%s/data/info?request=%s' % (self.msConfig['msOutputUrl'], wflow['RequestName']) try: res = self.curlMgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) data = json.loads(res)['result'][0] transferInfo = data['transferDoc'] except Exception as ex: msg = "General exception while fetching TransferInfo from MSOutput for %s. " msg += "Error: %s" self.logger.exception(msg, wflow['RequestName'], str(ex)) # Set Transfer status - information fetched from MSOutput only if transferInfo is not None and transferInfo['TransferStatus'] == 'done': wflow['TransferDone'] = True # Set Tape rules status - information fetched from Rucio (tape rule ids from MSOutput) if transferInfo is not None and transferInfo['OutputMap']: tapeRulesStatusList = [] # For setting 'TransferTape' = True we require either no tape rules for the # workflow have been created or all existing tape rules to be in status 'OK', # so every empty TapeRuleID we consider as completed. for mapRecord in transferInfo['OutputMap']: if not mapRecord['TapeRuleID']: continue rucioRule = self.rucio.getRule(mapRecord['TapeRuleID']) if not rucioRule: tapeRulesStatusList.append(False) msg = "Tape rule: %s not found for workflow: %s " msg += "Possible server side error." self.logger.error(msg, mapRecord['TapeRuleID'], wflow['RequestName']) continue if rucioRule['state'] == 'OK': tapeRulesStatusList.append(True) msg = "Tape rule: %s in final state: %s for workflow: %s" self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName']) else: tapeRulesStatusList.append(False) msg = "Tape rule: %s in non final state: %s for workflow: %s" self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName']) if all(tapeRulesStatusList): wflow['TransferTape'] = True return wflow def setParentDatasets(self, wflow): """ Used to resolve parent datasets for a workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ if wflow['InputDataset'] and wflow['IncludeParents']: childDataset = wflow['InputDataset'] parentDataset = findParent([childDataset], self.msConfig['dbsUrl']) # NOTE: If findParent() returned None then the DBS service failed to # resolve the request (it is considered an ERROR outside WMCore) if parentDataset.get(childDataset, None) is None: msg = "Failed to resolve parent dataset for: %s in workflow: %s" % (childDataset, wflow['RequestName']) raise MSRuleCleanerResolveParentError(msg) elif parentDataset: wflow['ParentDataset'] = [parentDataset[childDataset]] msg = "Found parent %s for input dataset %s in workflow: %s " self.logger.info(msg, parentDataset, wflow['InputDataset'], wflow['RequestName']) else: msg = "Could not find parent for input dataset: %s in workflows: %s" self.logger.error(msg, wflow['InputDataset'], wflow['RequestName']) return wflow def getRucioRules(self, wflow, gran, rucioAcct): """ Queries Rucio and builds the relevant list of blocklevel rules for the given workflow :param wflow: A MSRuleCleaner workflow representation :param gran: Data granularity to search for Rucio rules. Possible values: 'block' or 'container' :return: The workflow object """ currPline = wflow['PlineMarkers'][-1] # Create the container list to the rucio account map and set the checkGlobalLocks flag. mapRuleType = {self.msConfig['rucioWmaAccount']: ["OutputDatasets"], self.msConfig['rucioMStrAccount']: ["InputDataset", "MCPileup", "DataPileup", "ParentDataset"]} if rucioAcct == self.msConfig['rucioMStrAccount']: checkGlobalLocks = True else: checkGlobalLocks = False # Find all the data placement rules created by the components: for dataType in mapRuleType[rucioAcct]: dataList = wflow[dataType] if isinstance(wflow[dataType], list) else [wflow[dataType]] for dataCont in dataList: if dataCont is None: continue self.logger.debug("getRucioRules: dataCont: %s", pformat(dataCont)) if checkGlobalLocks and dataCont in self.globalLocks: msg = "Found dataset: %s in GlobalLocks. NOT considering it for filling the " msg += "RulesToClean list for both container and block level Rules for workflow: %s!" self.logger.info(msg, dataCont, wflow['RequestName']) continue if gran == 'container': for rule in self.rucio.listDataRules(dataCont, account=rucioAcct): wflow['RulesToClean'][currPline].append(rule['id']) msg = "Found %s container-level rule to be deleted for container %s" self.logger.info(msg, rule['id'], dataCont) elif gran == 'block': try: blocks = self.rucio.getBlocksInContainer(dataCont) for block in blocks: for rule in self.rucio.listDataRules(block, account=rucioAcct): wflow['RulesToClean'][currPline].append(rule['id']) msg = "Found %s block-level rule to be deleted for container %s" self.logger.info(msg, rule['id'], dataCont) except WMRucioDIDNotFoundException: msg = "Container: %s not found in Rucio for workflow: %s." self.logger.info(msg, dataCont, wflow['RequestName']) return wflow def cleanRucioRules(self, wflow): """ Cleans all the Rules present in the field 'RulesToClean' in the MSRuleCleaner workflow representation. And fills the relevant Cleanup Status. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # NOTE: The function should be called independently and sequentially from # The Input and the respective BlockLevel pipelines. # NOTE: The current functional pipeline is always the last one in the PlineMarkers list currPline = wflow['PlineMarkers'][-1] delResults = [] if self.msConfig['enableRealMode']: for rule in wflow['RulesToClean'][currPline]: self.logger.info("%s: Deleting ruleId: %s ", currPline, rule) delResult = self.rucio.deleteRule(rule) delResults.append(delResult) if not delResult: self.logger.warning("%s: Failed to delete ruleId: %s ", currPline, rule) else: for rule in wflow['RulesToClean'][currPline]: delResults.append(True) self.logger.info("%s: DRY-RUN: Is about to delete ruleId: %s ", currPline, rule) # Set the cleanup flag: wflow['CleanupStatus'][currPline] = all(delResults) return wflow def getRequestRecords(self, reqStatus): """ Queries ReqMgr2 for requests in a given status. :param reqStatus: The status for the requests to be fetched from ReqMgr2 :return requests: A dictionary with all the workflows in the given status """ self.logger.info("Fetching requests in status: %s", reqStatus) result = self.reqmgr2.getRequestByStatus([reqStatus], detail=True) if not result: requests = {} else: requests = result[0] self.logger.info(' retrieved %s requests in status: %s', len(requests), reqStatus) return requests
class LogDBTest(unittest.TestCase): """ _LogDBTest_ """ def setUp(self): """ _setUp_ Setup the database and logging connection. """ logging.basicConfig() self.logger = logging.getLogger('LogDBTest') # self.logger.setLevel(logging.DEBUG) url = 'http://localhost:5984/logdb_t' identifier = 'agentname' self.agent_inst = LogDB(url, identifier, logger=self.logger, create=True) self.agent_inst2 = LogDB(url, identifier, logger=self.logger, create=True, thread_name="Test") identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123' self.user_inst = LogDB(url, identifier, logger=self.logger, create=True) self.report = LogDBReport(self.agent_inst) def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.agent_inst.backend.deleteDatabase() def test_docid(self): "Test docid API" request = 'abs' mtype = self.agent_inst.backend.prefix('info') res = self.agent_inst.backend.docid(request, mtype) key = '--'.join([request, self.agent_inst.identifier, self.agent_inst.thread_name, mtype]) khash = gen_hash(key) self.assertEqual(res, khash) def test_prefix(self): "Test prefix LogDBBackend API" request = 'abc' self.assertEqual(self.agent_inst.agent, 1) mtype = self.agent_inst.backend.prefix('msg') self.assertEqual(mtype, 'agent-msg') self.assertEqual(self.user_inst.agent, 0) mtype = self.user_inst.backend.prefix('msg') self.assertEqual(mtype, 'msg') def test_apis(self): "Test LogDB APIs" request = 'abc' # if we post messages for the same request only last one should survive self.agent_inst.post(request, 'msg1') # time.sleep(1) self.agent_inst.post(request, 'msg2') # time.sleep(1) self.agent_inst.post(request, 'msg3') # time.sleep(1) self.agent_inst.post(request, 'msg4') # time.sleep(1) docs = self.agent_inst.get(request) self.assertEqual(len(docs), 1) self.assertEqual(docs[0]['msg'], 'msg4') # add message as user, user based messages will grow self.user_inst.post(request, 'doc1') # time.sleep(1) docs = self.user_inst.get(request) self.assertEqual(len(docs), 2) # we have msg4 and doc1 self.user_inst.post(request, 'doc1') # we have msg2 and two doc1 messages # time.sleep(1) docs = self.user_inst.get(request) self.assertEqual(len(docs), 3) def test_cleanup(self): "Test clean-up LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'comment') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') all_docs = self.agent_inst.get(request) self.agent_inst.backend.cleanup(thr=10) # look into past past_docs = self.agent_inst.get(request) self.assertEqual(len(all_docs), len(past_docs)) self.agent_inst.backend.cleanup(thr=-10) # look into future docs = self.agent_inst.get(request) self.assertEqual(len(docs), 0) def test_get_all_requests(self): "Test get_all_requests LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') sum_docs = self.agent_inst.get(request) self.assertEqual(len(sum_docs), 3) # since we have two info records request = 'abc2' self.agent_inst.post(request, 'msg1', 'info') all_docs = self.agent_inst.get_all_requests() self.assertEqual(len(all_docs), 2) # we only have two distinct request names def test_delete(self): "Test delete LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.delete(request) all_docs = self.agent_inst.get_all_requests() self.assertEqual(len(all_docs), 0) def test_get(self): "Test get LogDB API" request = 'abc' self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') docs = self.agent_inst.get(request) self.assertEqual(len(docs), 4) # there should be 4 docs one for Test thread and 3 for MainThread docs = self.agent_inst2.get(request) self.assertEqual(len(docs), 4) # this should result the same as above. docs1 = self.agent_inst.get(request, 'info') docs2 = self.agent_inst.get(request, 'info') req1 = set([r['request'] for r in docs1]) self.assertEqual(len(req1), 1) req2 = set([r['request'] for r in docs2]) self.assertEqual(len(req2), 1) self.assertEqual(req1, req2) def test_thread_name(self): "Test thread name support by LogDB APIs" request = 'abc' self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') maindocs = self.agent_inst.get(request) self.assertEqual(len(maindocs), 4) # since we have two info records in agent_inst thr_docs = self.agent_inst2.get(request) self.assertEqual(len(thr_docs), 4) # number of docs in different thread req1 = set([r['request'] for r in maindocs]) self.assertEqual(len(req1), 1) req2 = set([r['request'] for r in thr_docs]) self.assertEqual(len(req2), 1) self.assertEqual(req1, req2) worker1 = set([r['thr'] for r in maindocs]) self.assertEqual(worker1, set(["Test", "MainThread"])) worker2 = set([r['thr'] for r in thr_docs]) self.assertEqual(worker2, set(["Test", "MainThread"])) docs = set([r['identifier'] for r in maindocs]) self.assertEqual(docs, set([self.agent_inst.identifier])) docs = set([r['identifier'] for r in thr_docs]) self.assertEqual(docs, set([self.agent_inst.identifier])) def test_checks(self): "Tests LogDB check/mtype functionality" request = 'abc' res = self.agent_inst.post(request, 'msg', 'ingo') # should be silent, i.o. no Exceptions self.assertEqual('post-error', res) args = (request, 'msg', 'ingo') # wrong type self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args) args = (request, 'ingo') # wrong type self.assertRaises(LogDBError, self.agent_inst.backend.check, *args) args = ('', 'msg', 'info') # empty request string self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args) args = ('', 'info') # empty request string self.assertRaises(LogDBError, self.agent_inst.backend.check, *args) def test_report(self): "Test LogDBReport APIs" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') time.sleep(1) self.agent_inst.post(request, 'msg2', 'comment') time.sleep(1) self.agent_inst.post(request, 'msg3', 'warning') time.sleep(1) self.agent_inst.post(request, 'msg4', 'error') time.sleep(1) self.report.to_stdout(request) out = self.report.to_txt(request) print "\n### txt report\n", out out = self.report.to_html(request) print "\n### html report\n", out docs = self.agent_inst.get(request) tdocs = self.report.orderby(docs, order='ts') messages = [d['msg'] for d in tdocs] self.assertEqual(messages, ['msg4', 'msg3', 'msg2', 'msg1'])
class LogDBTest(unittest.TestCase): """ _LogDBTest_ """ def setUp(self): """ _setUp_ Setup the database and logging connection. """ logging.basicConfig() self.logger = logging.getLogger('LogDBTest') # self.logger.setLevel(logging.DEBUG) self.schema = [] self.couchApps = ["LogDB"] self.testInit = TestInitCouchApp('LogDBTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = 'logdb_t' self.testInit.setupCouch(dbName, *self.couchApps) url = "%s/%s" % (self.testInit.couchUrl, dbName) identifier = 'agentname' self.agent_inst = LogDB(url, identifier, logger=self.logger, thread_name="MainThread") self.agent_inst2 = LogDB(url, identifier, logger=self.logger, thread_name="Test") identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123' self.user_inst = LogDB(url, identifier, logger=self.logger, thread_name="MainThread") self.report = LogDBReport(self.agent_inst) def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def test_docid(self): "Test docid API" request = 'abs' mtype = self.agent_inst.backend.prefix('info') res = self.agent_inst.backend.docid(request, mtype) key = '--'.join([ request, self.agent_inst.identifier, self.agent_inst.thread_name, mtype ]) khash = gen_hash(key) self.assertEqual(res, khash) def test_prefix(self): "Test prefix LogDBBackend API" self.assertEqual(self.agent_inst.agent, 1) mtype = self.agent_inst.backend.prefix('msg') self.assertEqual(mtype, 'agent-msg') self.assertEqual(self.user_inst.agent, 0) mtype = self.user_inst.backend.prefix('msg') self.assertEqual(mtype, 'msg') def test_apis(self): "Test LogDB APIs" request = 'abc' # if we post messages for the same request only last one should survive self.agent_inst.post(request, 'msg1') # time.sleep(1) self.agent_inst.post(request, 'msg2') # time.sleep(1) self.agent_inst.post(request, 'msg3') # time.sleep(1) self.agent_inst.post(request, 'msg4') # time.sleep(1) docs = self.agent_inst.get(request) self.assertEqual(len(docs), 1) self.assertEqual(docs[0]['msg'], 'msg4') # add message as user, user based messages will grow self.user_inst.post(request, 'doc1') # time.sleep(1) docs = self.user_inst.get(request) self.assertEqual(len(docs), 2) # we have msg4 and doc1 self.user_inst.post(request, 'doc1') # we have msg2 and two doc1 messages # time.sleep(1) docs = self.user_inst.get(request) self.assertEqual(len(docs), 3) def test_cleanup(self): "Test clean-up LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'comment') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') all_docs = self.agent_inst.get(request) self.agent_inst.backend.cleanup(thr=10) # look into past past_docs = self.agent_inst.get(request) self.assertEqual(len(all_docs), len(past_docs)) self.agent_inst.backend.cleanup(thr=-10) # look into future docs = self.agent_inst.get(request) self.assertEqual(len(docs), 0) def test_get_all_requests(self): "Test get_all_requests LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') sum_docs = self.agent_inst.get(request) self.assertEqual(len(sum_docs), 3) # since we have two info records request = 'abc2' self.agent_inst.post(request, 'msg1', 'info') all_docs = self.agent_inst.get_all_requests() self.assertEqual(len(all_docs), 2) # we only have two distinct request names def test_delete(self): "Test delete LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.delete(request) all_docs = self.agent_inst.get_all_requests() self.assertEqual(len(all_docs), 0) def test_get(self): "Test get LogDB API" request = 'abc' self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') docs = self.agent_inst.get(request) self.assertEqual( len(docs), 4 ) # there should be 4 docs one for Test thread and 3 for MainThread docs = self.agent_inst2.get(request) self.assertEqual(len(docs), 4) # this should result the same as above. docs1 = self.agent_inst.get(request, 'info') docs2 = self.agent_inst.get(request, 'info') req1 = set([r['request'] for r in docs1]) self.assertEqual(len(req1), 1) req2 = set([r['request'] for r in docs2]) self.assertEqual(len(req2), 1) self.assertEqual(req1, req2) def test_thread_name(self): "Test thread name support by LogDB APIs" request = 'abc' self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') maindocs = self.agent_inst.get(request) self.assertEqual(len(maindocs), 4) # since we have two info records in agent_inst thr_docs = self.agent_inst2.get(request) self.assertEqual(len(thr_docs), 4) # number of docs in different thread req1 = set([r['request'] for r in maindocs]) self.assertEqual(len(req1), 1) req2 = set([r['request'] for r in thr_docs]) self.assertEqual(len(req2), 1) self.assertEqual(req1, req2) worker1 = set([r['thr'] for r in maindocs]) self.assertEqual(worker1, set(["Test", "MainThread"])) worker2 = set([r['thr'] for r in thr_docs]) self.assertEqual(worker2, set(["Test", "MainThread"])) docs = set([r['identifier'] for r in maindocs]) self.assertEqual(docs, set([self.agent_inst.identifier])) docs = set([r['identifier'] for r in thr_docs]) self.assertEqual(docs, set([self.agent_inst.identifier])) def test_checks(self): "Tests LogDB check/mtype functionality" request = 'abc' res = self.agent_inst.post( request, 'msg', 'ingo') # should be silent, i.o. no Exceptions self.assertEqual('post-error', res) args = (request, 'msg', 'ingo') # wrong type self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args) args = (request, 'ingo') # wrong type self.assertRaises(LogDBError, self.agent_inst.backend.check, *args) args = ('', 'msg', 'info') # empty request string self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args) args = ('', 'info') # empty request string self.assertRaises(LogDBError, self.agent_inst.backend.check, *args) def test_report(self): "Test LogDBReport APIs" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') time.sleep(1) self.agent_inst.post(request, 'msg2', 'comment') time.sleep(1) self.agent_inst.post(request, 'msg3', 'warning') time.sleep(1) self.agent_inst.post(request, 'msg4', 'error') time.sleep(1) self.report.to_stdout(request) out = self.report.to_txt(request) print("\n### txt report\n", out) out = self.report.to_html(request) print("\n### html report\n", out) docs = self.agent_inst.get(request) tdocs = self.report.orderby(docs, order='ts') messages = [d['msg'] for d in tdocs] self.assertEqual(messages, ['msg4', 'msg3', 'msg2', 'msg1']) def test_heartbeat(self): "Test heartbeat_report function" threadName = 'MainThread' failThread = 'FailThread' self.agent_inst.post(msg="Test heartbeat", mtype="info") time.sleep(1) self.agent_inst.post(msg="Test heartbeatError", mtype="error") report = self.agent_inst.heartbeat_report() self.assertEqual(report[threadName]['type'], 'agent-error') report = self.agent_inst.wmstats_down_components_report([threadName]) self.assertEqual(report['down_components'], [threadName]) self.agent_inst.post(msg="Test heartbeat", mtype="info") report = self.agent_inst.wmstats_down_components_report([threadName]) self.assertEqual(report['down_components'], []) report = self.agent_inst.wmstats_down_components_report( [threadName, failThread]) self.assertEqual(report['down_components'], [failThread]) self.agent_inst.post(msg="Test heartbeatError2", mtype="error") report = self.agent_inst.wmstats_down_components_report([threadName]) self.assertEqual(report['down_components'], [threadName]) self.agent_inst.delete(mtype="error", this_thread=True) report = self.agent_inst.wmstats_down_components_report([threadName]) self.assertEqual(report['down_components'], [])