def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSUnmerged module :param msConfig: micro service configuration """ super(MSUnmerged, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) self.msConfig.setdefault("limitFilesPerRSE", 200) self.msConfig.setdefault("skipRSEs", []) self.msConfig.setdefault("rseExpr", "*") self.msConfig.setdefault("enableRealMode", False) self.msConfig.setdefault("dumpRSE", False) self.msConfig.setdefault("gfalLogLevel", 'normal') self.msConfig.setdefault("dirFilterIncl", []) self.msConfig.setdefault("dirFilterExcl", []) self.msConfig.setdefault("emulateGfal2", False) self.msConfig.setdefault("filesToDeleteSliceSize", 100) if self.msConfig['emulateGfal2'] is False and gfal2 is None: msg = "Failed to import gfal2 library while it's not " msg += "set to emulate it. Crashing the service!" raise ImportError(msg) # TODO: Add 'alertManagerUrl' to msConfig' # self.alertServiceName = "ms-unmerged" # self.alertManagerAPI = AlertManagerAPI(self.msConfig.get("alertManagerUrl", None), logger=logger) # Instantiating the Rucio Consistency Monitor Client self.rucioConMon = RucioConMon(self.msConfig['rucioConMon'], logger=self.logger) self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger) # Building all the Pipelines: pName = 'plineUnmerged' self.plineUnmerged = Pipeline(name=pName, funcLine=[Functor(self.updateRSETimestamps, start=True, end=False), Functor(self.consRecordAge), Functor(self.getUnmergedFiles), Functor(self.filterUnmergedFiles), Functor(self.getPfn), Functor(self.cleanRSE), Functor(self.updateRSECounters, pName), Functor(self.updateRSETimestamps, start=False, end=True), Functor(self.purgeRseObj, dumpRSE=self.msConfig['dumpRSE'])]) # Initialization of the deleted files counters: self.rseCounters = {} self.plineCounters = {} self.rseTimestamps = {} self.rseConsStats = {} self.protectedLFNs = [] # The basic /store/unmerged regular expression: self.regStoreUnmergedLfn = re.compile("^/store/unmerged/.*$") self.regStoreUnmergedPfn = re.compile("^.+/store/unmerged/.*$")
def msOutputProducer(self, requestRecords): """ A top level function to fetch requests from ReqMgr2, and produce the correspondent records for MSOutput in MongoDB. :param requestRecords: list of request dictionaries retrieved from ReqMgr2 It's implemented as a pipeline, performing the following sequential actions: 1) document transformer - creates a MSOutputTemplate object from the request dict 2) document info updater - parses the MSOutputTemplate object and updates the necessary data structure mapping output/locations/campaign/etc 3) document uploader - inserts the MSOutputTemplate object into the correct MongoDB collection (ReVal is separated from standard workflows) 4) document cleaner - releases memory reference to the MSOutputTemplate object """ # DONE: # to set a destructive function at the end of the pipeline # NOTE: # To discuss the collection names # NOTE: # Here we should never use docUploader with `update=True`, because # this will erase the latest state of already existing and fully or # partially processed documents by the Consumer pipeline self.logger.info("Running the msOutputProducer ...") msPipeline = Pipeline(name="MSOutputProducer Pipeline", funcLine=[ Functor(self.docTransformer), Functor(self.docInfoUpdate), Functor(self.docUploader), Functor(self.docCleaner) ]) # TODO: # To generate the object from within the Function scope see above. counter = 0 for request in viewvalues(requestRecords): if request['RequestName'] in self.requestNamesCached: # if it's cached, then it's already in MongoDB, no need to redo this thing! continue counter += 1 try: pipeLineName = msPipeline.getPipelineName() msPipeline.run(request) except (KeyError, TypeError) as ex: msg = "%s Possibly broken read from ReqMgr2 API or other. Err: %s." % ( pipeLineName, str(ex)) msg += " Continue to the next document." self.logger.exception(msg) continue except Exception as ex: msg = "%s General Error from pipeline. Err: %s. " % ( pipeLineName, str(ex)) msg += "Giving up Now." self.logger.exception(str(ex)) break return counter
def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSUnmerged module :param msConfig: micro service configuration """ super(MSUnmerged, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) # self.msConfig.setdefault('limitRSEsPerInstance', 100) # self.msConfig.setdefault('limitTiersPerInstance', ['T1', 'T2', 'T3']) # self.msConfig.setdefault("rucioAccount", "FIXME_RUCIO_ACCT") self.msConfig.setdefault("rseExpr", "*") # TODO: Add 'alertManagerUrl' to msConfig' # self.alertServiceName = "ms-unmerged" # self.alertManagerAPI = AlertManagerAPI(self.msConfig.get("alertManagerUrl", None), logger=logger) # Building all the Pipelines: pName = 'plineUnmerged' self.plineUnmerged = Pipeline(name=pName, funcLine=[Functor(self.cleanFiles)]) # Initialization of the deleted files counters: self.rseCounters = {} self.plineCounters = {}
def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSRuleCleaner module :param msConfig: micro service configuration """ super(MSRuleCleaner, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) self.msConfig.setdefault("services", ['ruleCleaner']) self.msConfig.setdefault("rucioWmaAccount", "wma_test") self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor") self.msConfig.setdefault('enableRealMode', False) self.mode = "RealMode" if self.msConfig[ 'enableRealMode'] else "DryRunMode" self.emailAlert = EmailAlert(self.msConfig) self.curlMgr = RequestHandler() # Building all the Pipelines: pName = 'plineMSTrCont' self.plineMSTrCont = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.cleanRucioRules) ]) pName = 'plineMSTrBlock' self.plineMSTrBlock = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.cleanRucioRules) ]) pName = 'plineAgentCont' self.plineAgentCont = Pipeline( name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules) ]) pName = 'plineAgentBlock' self.plineAgentBlock = Pipeline( name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules) ]) pName = 'plineArchive' self.plineArchive = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.setClean), Functor(self.archive) ]) # Building the different set of plines we will need later: # NOTE: The following are all the functional pipelines which are supposed to include # a cleanup function and report cleanup status in the MSRuleCleanerWflow object self.cleanuplines = [ self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont, self.plineAgentBlock ] # Building an auxiliary list of cleanup pipeline names only: self.cleanupPipeNames = [pline.name for pline in self.cleanuplines] # Building lists of pipelines related only to Agents or MStransferror self.agentlines = [self.plineAgentCont, self.plineAgentBlock] self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock] # Initialization of the 'cleaned' and 'archived' counters: self.wfCounters = {'cleaned': {}, 'archived': 0}
def msOutputProducer(self, requestRecords): """ A top level function to drive the upload of all the documents to MongoDB """ # DONE: # To implement this as a functional pipeline in the following sequence: # 1) document streamer - to generate all the records coming from Reqmgr2 # 2) document stripper - to cut all the cut all the kews we do not need # Mongodb document creator - to pass it through the MongoDBTemplate # 3) document updater - fetch & update all the needed info like campaign config etc. # 4) MongoDB upload/update - to upload/update the document in Mongodb # DONE: # to have the requestRecords generated through a call to docStreamer # and the call should happen from inside this function so that all # the Objects generated do not leave the scope of this function and # with that to reduce big memory footprint # DONE: # to set a destructive function at the end of the pipeline # NOTE: # To discuss the collection names # NOTE: # Here we should never use docUploader with `update=True`, because # this will erase the latest state of already existing and fully or # partially processed documents by the Consumer pipeline self.logger.info("Running the msOutputProducer ...") msPipelineRelVal = Pipeline(name="MSOutputProducer PipelineRelVal", funcLine=[ Functor(self.docTransformer), Functor(self.docKeyUpdate, isRelVal=True), Functor(self.docInfoUpdate, pipeLine='PipelineRelVal'), Functor(self.docUploader, self.msOutRelValColl), Functor(self.docCleaner) ]) msPipelineNonRelVal = Pipeline( name="MSOutputProducer PipelineNonRelVal", funcLine=[ Functor(self.docTransformer), Functor(self.docKeyUpdate, isRelVal=False), Functor(self.docInfoUpdate, pipeLine='PipelineNonRelVal'), Functor(self.docUploader, self.msOutNonRelValColl), Functor(self.docCleaner) ]) # TODO: # To generate the object from within the Function scope see above. counter = 0 for _, request in requestRecords: counter += 1 try: if request.get('SubRequestType') == 'RelVal': pipeLine = msPipelineRelVal pipeLineName = pipeLine.getPipelineName() pipeLine.run(request) else: pipeLine = msPipelineNonRelVal pipeLineName = pipeLine.getPipelineName() pipeLine.run(request) except KeyError as ex: msg = "%s Possibly broken read from Reqmgr2 API or other Err: %s. " % ( pipeLineName, str(ex)) msg += "Continue to the next document." self.logger.exception(msg) continue except TypeError as ex: msg = "%s Possibly broken read from Reqmgr2 API or other Err: %s. " % ( pipeLineName, str(ex)) msg += "Continue to the next document." self.logger.exception(msg) continue except Exception as ex: msg = "%s General Error from pipeline. Err: %s. " % ( pipeLineName, str(ex)) msg += "Giving up Now." self.logger.error(msg) self.logger.exception(ex) break return counter
def msOutputConsumer(self): """ A top level function to drive the creation and book keeping of all the subscriptions to the Data Management System """ # DONE: # Done: To check if the 'enableDataPlacement' flag is really taken into account # Done: To make this for both relvals and non relvals # Done: To return the result # Done: To make report document # Done: To build it through a pipe # Done: To write back the updated document to MonogoDB msPipelineRelVal = Pipeline(name="MSOutputConsumer PipelineRelVal", funcLine=[ Functor(self.docReadfromMongo, self.msOutRelValColl, setTaken=False), Functor(self.makeSubscriptions), Functor(self.docKeyUpdate, isTaken=False, isTakenBy=None, lastUpdate=int(time())), Functor(self.docUploader, self.msOutRelValColl, update=True, keys=[ 'isTaken', 'lastUpdate', 'transferStatus', 'transferIDs' ]), Functor(self.docDump, pipeLine='PipelineRelVal'), Functor(self.docCleaner) ]) msPipelineNonRelVal = Pipeline( name="MSOutputConsumer PipelineNonRelVal", funcLine=[ Functor(self.docReadfromMongo, self.msOutNonRelValColl, setTaken=False), Functor(self.makeSubscriptions), Functor(self.docKeyUpdate, isTaken=False, isTakenBy=None, lastUpdate=int(time())), Functor(self.docUploader, self.msOutNonRelValColl, update=True, keys=[ 'isTaken', 'lastUpdate', 'transferStatus', 'transferIDs' ]), Functor(self.docDump, pipeLine='PipelineNonRelVal'), Functor(self.docCleaner) ]) # NOTE: # If we actually have any exception that has reached to the top level # exception handlers (eg. here - outside the pipeLine), this means # some function from within the pipeLine has not caught it and the msOutDoc # has left the pipe and died before the relevant document in MongoDB # has been released (its flag 'isTaken' to be set back to False) wfCounters = {} for pipeLine in [msPipelineRelVal, msPipelineNonRelVal]: pipeLineName = pipeLine.getPipelineName() wfCounters[pipeLineName] = 0 while wfCounters[pipeLineName] < self.msConfig[ 'limitRequestsPerCycle']: # take only workflows: # - which are not already taken or # - a transfer subscription have never been done for them and # - avoid retrying workflows in the same cycle # NOTE: # Once we are running the service not in a dry run mode we may # consider adding and $or condition in mQueryDict for transferStatus: # '$or': [{'transferStatus': None}, # {'transferStatus': 'incomplete'}] # So that we can collect also workflows with partially or fully # unsuccessful transfers currTime = int(time()) treshTime = currTime - self.msConfig['interval'] mQueryDict = { '$and': [{ 'isTaken': False }, { '$or': [{ 'transferStatus': None }, { 'transferStatus': 'incomplete' }] }, { '$or': [{ 'lastUpdate': None }, { 'lastUpdate': { '$lt': treshTime } }] }] } # FIXME: # To redefine those exceptions as MSoutputExceptions and # start using those here so we do not mix with general errors try: pipeLine.run(mQueryDict) except KeyError as ex: msg = "%s Possibly malformed record in MongoDB. Err: %s. " % ( pipeLineName, str(ex)) msg += "Continue to the next document." self.logger.exception(msg) continue except TypeError as ex: msg = "%s Possibly malformed record in MongoDB. Err: %s. " % ( pipeLineName, str(ex)) msg += "Continue to the next document." self.logger.exception(msg) continue except EmptyResultError as ex: msg = "%s All relevant records in MongoDB exhausted. " % pipeLineName msg += "We are done for the current cycle." self.logger.info(msg) break except Exception as ex: msg = "%s General Error from pipeline. Err: %s. " % ( pipeLineName, str(ex)) msg += "Giving up Now." self.logger.error(msg) self.logger.exception(ex) break wfCounters[pipeLineName] += 1 wfCounterTotal = sum(wfCounters.values()) return wfCounterTotal
def msOutputConsumer(self): """ A top level function to drive the creation and book keeping of all the subscriptions to the Data Management System """ # DONE: # Done: To check if the 'enableDataPlacement' flag is really taken into account # Done: To make this for both relvals and non relvals # Done: To return the result # Done: To make report document # Done: To build it through a pipe # Done: To write back the updated document to MonogoDB msPipelineRelVal = Pipeline( name="MSOutputConsumer PipelineRelVal", funcLine=[ Functor(self.makeSubscriptions), Functor(self.makeTapeSubscriptions), Functor(self.docUploader, update=True, keys=['LastUpdate', 'TransferStatus', 'OutputMap']), Functor(self.docDump, pipeLine='PipelineRelVal'), Functor(self.docCleaner) ]) msPipelineNonRelVal = Pipeline( name="MSOutputConsumer PipelineNonRelVal", funcLine=[ Functor(self.makeSubscriptions), Functor(self.makeTapeSubscriptions), Functor(self.docUploader, update=True, keys=['LastUpdate', 'TransferStatus', 'OutputMap']), Functor(self.docDump, pipeLine='PipelineNonRelVal'), Functor(self.docCleaner) ]) wfCounterTotal = 0 mQueryDict = {'TransferStatus': 'pending'} pipeCollections = [(msPipelineRelVal, self.msOutRelValColl), (msPipelineNonRelVal, self.msOutNonRelValColl)] for pipeColl in pipeCollections: wfCounters = 0 pipeLine = pipeColl[0] dbColl = pipeColl[1] pipeLineName = pipeLine.getPipelineName() for docOut in self.getDocsFromMongo( mQueryDict, dbColl, self.msConfig['limitRequestsPerCycle']): # FIXME: # To redefine those exceptions as MSoutputExceptions and # start using those here so we do not mix with general errors try: # If it's in MongoDB, it can get into our in-memory cache self.requestNamesCached.append(docOut['RequestName']) pipeLine.run(docOut) except (KeyError, TypeError) as ex: msg = "%s Possibly malformed record in MongoDB. Err: %s. " % ( pipeLineName, str(ex)) msg += "Continue to the next document." self.logger.exception(msg) continue except EmptyResultError as ex: msg = "%s All relevant records in MongoDB exhausted. " % pipeLineName msg += "We are done for the current cycle." self.logger.info(msg) break except Exception as ex: msg = "%s General error from pipeline. Err: %s. " % ( pipeLineName, str(ex)) msg += "Will retry again in the next cycle." self.logger.exception(msg) break wfCounters += 1 self.logger.info("Processed %d workflows from pipeline: %s", wfCounters, pipeLineName) wfCounterTotal += wfCounters return wfCounterTotal