Python Pipeline.Pipeline Examples

Programming Language: Python

Namespace/Package Name: Utils.Pipeline

Class/Type: Pipeline

Method/Function: Pipeline

Examples at hotexamples.com: 7

Python Pipeline.Pipeline - 7 examples found. These are the top rated real world Python examples of Utils.Pipeline.Pipeline.Pipeline extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Pipeline(7)

run(4)

getPipelineName(1)

Example #1

Show file

    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSUnmerged module
        :param msConfig: micro service configuration
        """
        super(MSUnmerged, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        self.msConfig.setdefault("limitFilesPerRSE", 200)
        self.msConfig.setdefault("skipRSEs", [])
        self.msConfig.setdefault("rseExpr", "*")
        self.msConfig.setdefault("enableRealMode", False)
        self.msConfig.setdefault("dumpRSE", False)
        self.msConfig.setdefault("gfalLogLevel", 'normal')
        self.msConfig.setdefault("dirFilterIncl", [])
        self.msConfig.setdefault("dirFilterExcl", [])
        self.msConfig.setdefault("emulateGfal2", False)
        self.msConfig.setdefault("filesToDeleteSliceSize", 100)
        if self.msConfig['emulateGfal2'] is False and gfal2 is None:
            msg = "Failed to import gfal2 library while it's not "
            msg += "set to emulate it. Crashing the service!"
            raise ImportError(msg)

        # TODO: Add 'alertManagerUrl' to msConfig'
        # self.alertServiceName = "ms-unmerged"
        # self.alertManagerAPI = AlertManagerAPI(self.msConfig.get("alertManagerUrl", None), logger=logger)

        # Instantiating the Rucio Consistency Monitor Client
        self.rucioConMon = RucioConMon(self.msConfig['rucioConMon'], logger=self.logger)

        self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger)

        # Building all the Pipelines:
        pName = 'plineUnmerged'
        self.plineUnmerged = Pipeline(name=pName,
                                      funcLine=[Functor(self.updateRSETimestamps, start=True, end=False),
                                                Functor(self.consRecordAge),
                                                Functor(self.getUnmergedFiles),
                                                Functor(self.filterUnmergedFiles),
                                                Functor(self.getPfn),
                                                Functor(self.cleanRSE),
                                                Functor(self.updateRSECounters, pName),
                                                Functor(self.updateRSETimestamps, start=False, end=True),
                                                Functor(self.purgeRseObj, dumpRSE=self.msConfig['dumpRSE'])])
        # Initialization of the deleted files counters:
        self.rseCounters = {}
        self.plineCounters = {}
        self.rseTimestamps = {}
        self.rseConsStats = {}
        self.protectedLFNs = []

        # The basic /store/unmerged regular expression:
        self.regStoreUnmergedLfn = re.compile("^/store/unmerged/.*$")
        self.regStoreUnmergedPfn = re.compile("^.+/store/unmerged/.*$")

Example #2

Show file

    def msOutputProducer(self, requestRecords):
        """
        A top level function to fetch requests from ReqMgr2, and produce the correspondent
        records for MSOutput in MongoDB.
        :param requestRecords: list of request dictionaries retrieved from ReqMgr2

        It's implemented as a pipeline, performing the following sequential actions:
           1) document transformer - creates a MSOutputTemplate object from the request dict
           2) document info updater - parses the MSOutputTemplate object and updates the
              necessary data structure mapping output/locations/campaign/etc
           3) document uploader - inserts the MSOutputTemplate object into the correct
              MongoDB collection (ReVal is separated from standard workflows)
           4) document cleaner - releases memory reference to the MSOutputTemplate object
        """
        # DONE:
        #    to set a destructive function at the end of the pipeline
        # NOTE:
        #    To discuss the collection names
        # NOTE:
        #    Here we should never use docUploader with `update=True`, because
        #    this will erase the latest state of already existing and fully or
        #    partially processed documents by the Consumer pipeline
        self.logger.info("Running the msOutputProducer ...")
        msPipeline = Pipeline(name="MSOutputProducer Pipeline",
                              funcLine=[
                                  Functor(self.docTransformer),
                                  Functor(self.docInfoUpdate),
                                  Functor(self.docUploader),
                                  Functor(self.docCleaner)
                              ])
        # TODO:
        #    To generate the object from within the Function scope see above.
        counter = 0
        for request in viewvalues(requestRecords):
            if request['RequestName'] in self.requestNamesCached:
                # if it's cached, then it's already in MongoDB, no need to redo this thing!
                continue
            counter += 1
            try:
                pipeLineName = msPipeline.getPipelineName()
                msPipeline.run(request)
            except (KeyError, TypeError) as ex:
                msg = "%s Possibly broken read from ReqMgr2 API or other. Err: %s." % (
                    pipeLineName, str(ex))
                msg += " Continue to the next document."
                self.logger.exception(msg)
                continue
            except Exception as ex:
                msg = "%s General Error from pipeline. Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Giving up Now."
                self.logger.exception(str(ex))
                break
        return counter

Example #3

Show file

    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSUnmerged module
        :param msConfig: micro service configuration
        """
        super(MSUnmerged, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        # self.msConfig.setdefault('limitRSEsPerInstance', 100)
        # self.msConfig.setdefault('limitTiersPerInstance', ['T1', 'T2', 'T3'])
        # self.msConfig.setdefault("rucioAccount", "FIXME_RUCIO_ACCT")
        self.msConfig.setdefault("rseExpr", "*")
        # TODO: Add 'alertManagerUrl' to msConfig'
        # self.alertServiceName = "ms-unmerged"
        # self.alertManagerAPI = AlertManagerAPI(self.msConfig.get("alertManagerUrl", None), logger=logger)

        # Building all the Pipelines:
        pName = 'plineUnmerged'
        self.plineUnmerged = Pipeline(name=pName,
                                      funcLine=[Functor(self.cleanFiles)])
        # Initialization of the deleted files counters:
        self.rseCounters = {}
        self.plineCounters = {}

Example #4

Show file

File: MSRuleCleaner.py Project: haozturk/WMCore

    def __init__(self, msConfig, logger=None):
        """
        Runs the basic setup and initialization for the MSRuleCleaner module
        :param msConfig: micro service configuration
        """
        super(MSRuleCleaner, self).__init__(msConfig, logger=logger)

        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 60)
        self.msConfig.setdefault("services", ['ruleCleaner'])
        self.msConfig.setdefault("rucioWmaAccount", "wma_test")
        self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor")
        self.msConfig.setdefault('enableRealMode', False)
        self.mode = "RealMode" if self.msConfig[
            'enableRealMode'] else "DryRunMode"
        self.emailAlert = EmailAlert(self.msConfig)
        self.curlMgr = RequestHandler()

        # Building all the Pipelines:
        pName = 'plineMSTrCont'
        self.plineMSTrCont = Pipeline(name=pName,
                                      funcLine=[
                                          Functor(self.setPlineMarker, pName),
                                          Functor(self.cleanRucioRules)
                                      ])
        pName = 'plineMSTrBlock'
        self.plineMSTrBlock = Pipeline(name=pName,
                                       funcLine=[
                                           Functor(self.setPlineMarker, pName),
                                           Functor(self.cleanRucioRules)
                                       ])
        pName = 'plineAgentCont'
        self.plineAgentCont = Pipeline(
            name=pName,
            funcLine=[
                Functor(self.setPlineMarker, pName),
                Functor(self.getRucioRules, 'container',
                        self.msConfig['rucioWmaAccount']),
                Functor(self.cleanRucioRules)
            ])
        pName = 'plineAgentBlock'
        self.plineAgentBlock = Pipeline(
            name=pName,
            funcLine=[
                Functor(self.setPlineMarker, pName),
                Functor(self.getRucioRules, 'block',
                        self.msConfig['rucioWmaAccount']),
                Functor(self.cleanRucioRules)
            ])
        pName = 'plineArchive'
        self.plineArchive = Pipeline(name=pName,
                                     funcLine=[
                                         Functor(self.setPlineMarker, pName),
                                         Functor(self.setClean),
                                         Functor(self.archive)
                                     ])

        # Building the different set of plines we will need later:
        # NOTE: The following are all the functional pipelines which are supposed to include
        #       a cleanup function and report cleanup status in the MSRuleCleanerWflow object
        self.cleanuplines = [
            self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont,
            self.plineAgentBlock
        ]
        # Building an auxiliary list of cleanup pipeline names only:
        self.cleanupPipeNames = [pline.name for pline in self.cleanuplines]

        # Building lists of pipelines related only to Agents or MStransferror
        self.agentlines = [self.plineAgentCont, self.plineAgentBlock]
        self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock]

        # Initialization of the 'cleaned' and 'archived' counters:
        self.wfCounters = {'cleaned': {}, 'archived': 0}

Example #5

Show file

    def msOutputProducer(self, requestRecords):
        """
        A top level function to drive the upload of all the documents to MongoDB
        """

        # DONE:
        #    To implement this as a functional pipeline in the following sequence:
        #    1) document streamer - to generate all the records coming from Reqmgr2
        #    2) document stripper - to cut all the cut all the kews we do not need
        #       Mongodb document creator - to pass it through the MongoDBTemplate
        #    3) document updater - fetch & update all the needed info like campaign config etc.
        #    4) MongoDB upload/update - to upload/update the document in Mongodb

        # DONE:
        #    to have the requestRecords generated through a call to docStreamer
        #    and the call should happen from inside this function so that all
        #    the Objects generated do not leave the scope of this function and
        #    with that  to reduce big memory footprint

        # DONE:
        #    to set a destructive function at the end of the pipeline
        # NOTE:
        #    To discuss the collection names
        # NOTE:
        #    Here we should never use docUploader with `update=True`, because
        #    this will erase the latest state of already existing and fully or
        #    partially processed documents by the Consumer pipeline
        self.logger.info("Running the msOutputProducer ...")
        msPipelineRelVal = Pipeline(name="MSOutputProducer PipelineRelVal",
                                    funcLine=[
                                        Functor(self.docTransformer),
                                        Functor(self.docKeyUpdate,
                                                isRelVal=True),
                                        Functor(self.docInfoUpdate,
                                                pipeLine='PipelineRelVal'),
                                        Functor(self.docUploader,
                                                self.msOutRelValColl),
                                        Functor(self.docCleaner)
                                    ])
        msPipelineNonRelVal = Pipeline(
            name="MSOutputProducer PipelineNonRelVal",
            funcLine=[
                Functor(self.docTransformer),
                Functor(self.docKeyUpdate, isRelVal=False),
                Functor(self.docInfoUpdate, pipeLine='PipelineNonRelVal'),
                Functor(self.docUploader, self.msOutNonRelValColl),
                Functor(self.docCleaner)
            ])
        # TODO:
        #    To generate the object from within the Function scope see above.
        counter = 0
        for _, request in requestRecords:
            counter += 1
            try:
                if request.get('SubRequestType') == 'RelVal':
                    pipeLine = msPipelineRelVal
                    pipeLineName = pipeLine.getPipelineName()
                    pipeLine.run(request)
                else:
                    pipeLine = msPipelineNonRelVal
                    pipeLineName = pipeLine.getPipelineName()
                    pipeLine.run(request)
            except KeyError as ex:
                msg = "%s Possibly broken read from Reqmgr2 API or other Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Continue to the next document."
                self.logger.exception(msg)
                continue
            except TypeError as ex:
                msg = "%s Possibly broken read from Reqmgr2 API or other Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Continue to the next document."
                self.logger.exception(msg)
                continue
            except Exception as ex:
                msg = "%s General Error from pipeline. Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Giving up Now."
                self.logger.error(msg)
                self.logger.exception(ex)
                break
        return counter

Example #6

Show file

    def msOutputConsumer(self):
        """
        A top level function to drive the creation and book keeping of all the
        subscriptions to the Data Management System
        """
        # DONE:
        #    Done: To check if the 'enableDataPlacement' flag is really taken into account
        #    Done: To make this for both relvals and non relvals
        #    Done: To return the result
        #    Done: To make report document
        #    Done: To build it through a pipe
        #    Done: To write back the updated document to MonogoDB
        msPipelineRelVal = Pipeline(name="MSOutputConsumer PipelineRelVal",
                                    funcLine=[
                                        Functor(self.docReadfromMongo,
                                                self.msOutRelValColl,
                                                setTaken=False),
                                        Functor(self.makeSubscriptions),
                                        Functor(self.docKeyUpdate,
                                                isTaken=False,
                                                isTakenBy=None,
                                                lastUpdate=int(time())),
                                        Functor(self.docUploader,
                                                self.msOutRelValColl,
                                                update=True,
                                                keys=[
                                                    'isTaken', 'lastUpdate',
                                                    'transferStatus',
                                                    'transferIDs'
                                                ]),
                                        Functor(self.docDump,
                                                pipeLine='PipelineRelVal'),
                                        Functor(self.docCleaner)
                                    ])
        msPipelineNonRelVal = Pipeline(
            name="MSOutputConsumer PipelineNonRelVal",
            funcLine=[
                Functor(self.docReadfromMongo,
                        self.msOutNonRelValColl,
                        setTaken=False),
                Functor(self.makeSubscriptions),
                Functor(self.docKeyUpdate,
                        isTaken=False,
                        isTakenBy=None,
                        lastUpdate=int(time())),
                Functor(self.docUploader,
                        self.msOutNonRelValColl,
                        update=True,
                        keys=[
                            'isTaken', 'lastUpdate', 'transferStatus',
                            'transferIDs'
                        ]),
                Functor(self.docDump, pipeLine='PipelineNonRelVal'),
                Functor(self.docCleaner)
            ])

        # NOTE:
        #    If we actually have any exception that has reached to the top level
        #    exception handlers (eg. here - outside the pipeLine), this means
        #    some function from within the pipeLine has not caught it and the msOutDoc
        #    has left the pipe and died before the relevant document in MongoDB
        #    has been released (its flag 'isTaken' to be set back to False)
        wfCounters = {}
        for pipeLine in [msPipelineRelVal, msPipelineNonRelVal]:
            pipeLineName = pipeLine.getPipelineName()
            wfCounters[pipeLineName] = 0
            while wfCounters[pipeLineName] < self.msConfig[
                    'limitRequestsPerCycle']:
                # take only workflows:
                # - which are not already taken or
                # - a transfer subscription have never been done for them and
                # - avoid retrying workflows in the same cycle
                # NOTE:
                #    Once we are running the service not in a dry run mode we may
                #    consider adding and $or condition in mQueryDict for transferStatus:
                #    '$or': [{'transferStatus': None},
                #            {'transferStatus': 'incomplete'}]
                #    So that we can collect also workflows with partially or fully
                #    unsuccessful transfers
                currTime = int(time())
                treshTime = currTime - self.msConfig['interval']
                mQueryDict = {
                    '$and': [{
                        'isTaken': False
                    }, {
                        '$or': [{
                            'transferStatus': None
                        }, {
                            'transferStatus': 'incomplete'
                        }]
                    }, {
                        '$or': [{
                            'lastUpdate': None
                        }, {
                            'lastUpdate': {
                                '$lt': treshTime
                            }
                        }]
                    }]
                }

                # FIXME:
                #    To redefine those exceptions as MSoutputExceptions and
                #    start using those here so we do not mix with general errors
                try:
                    pipeLine.run(mQueryDict)
                except KeyError as ex:
                    msg = "%s Possibly malformed record in MongoDB. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Continue to the next document."
                    self.logger.exception(msg)
                    continue
                except TypeError as ex:
                    msg = "%s Possibly malformed record in MongoDB. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Continue to the next document."
                    self.logger.exception(msg)
                    continue
                except EmptyResultError as ex:
                    msg = "%s All relevant records in MongoDB exhausted. " % pipeLineName
                    msg += "We are done for the current cycle."
                    self.logger.info(msg)
                    break
                except Exception as ex:
                    msg = "%s General Error from pipeline. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Giving up Now."
                    self.logger.error(msg)
                    self.logger.exception(ex)
                    break
                wfCounters[pipeLineName] += 1

        wfCounterTotal = sum(wfCounters.values())
        return wfCounterTotal

Example #7

Show file

    def msOutputConsumer(self):
        """
        A top level function to drive the creation and book keeping of all the
        subscriptions to the Data Management System
        """
        # DONE:
        #    Done: To check if the 'enableDataPlacement' flag is really taken into account
        #    Done: To make this for both relvals and non relvals
        #    Done: To return the result
        #    Done: To make report document
        #    Done: To build it through a pipe
        #    Done: To write back the updated document to MonogoDB
        msPipelineRelVal = Pipeline(
            name="MSOutputConsumer PipelineRelVal",
            funcLine=[
                Functor(self.makeSubscriptions),
                Functor(self.makeTapeSubscriptions),
                Functor(self.docUploader,
                        update=True,
                        keys=['LastUpdate', 'TransferStatus', 'OutputMap']),
                Functor(self.docDump, pipeLine='PipelineRelVal'),
                Functor(self.docCleaner)
            ])
        msPipelineNonRelVal = Pipeline(
            name="MSOutputConsumer PipelineNonRelVal",
            funcLine=[
                Functor(self.makeSubscriptions),
                Functor(self.makeTapeSubscriptions),
                Functor(self.docUploader,
                        update=True,
                        keys=['LastUpdate', 'TransferStatus', 'OutputMap']),
                Functor(self.docDump, pipeLine='PipelineNonRelVal'),
                Functor(self.docCleaner)
            ])

        wfCounterTotal = 0
        mQueryDict = {'TransferStatus': 'pending'}
        pipeCollections = [(msPipelineRelVal, self.msOutRelValColl),
                           (msPipelineNonRelVal, self.msOutNonRelValColl)]
        for pipeColl in pipeCollections:
            wfCounters = 0
            pipeLine = pipeColl[0]
            dbColl = pipeColl[1]
            pipeLineName = pipeLine.getPipelineName()
            for docOut in self.getDocsFromMongo(
                    mQueryDict, dbColl,
                    self.msConfig['limitRequestsPerCycle']):
                # FIXME:
                #    To redefine those exceptions as MSoutputExceptions and
                #    start using those here so we do not mix with general errors
                try:
                    # If it's in MongoDB, it can get into our in-memory cache
                    self.requestNamesCached.append(docOut['RequestName'])
                    pipeLine.run(docOut)
                except (KeyError, TypeError) as ex:
                    msg = "%s Possibly malformed record in MongoDB. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Continue to the next document."
                    self.logger.exception(msg)
                    continue
                except EmptyResultError as ex:
                    msg = "%s All relevant records in MongoDB exhausted. " % pipeLineName
                    msg += "We are done for the current cycle."
                    self.logger.info(msg)
                    break
                except Exception as ex:
                    msg = "%s General error from pipeline. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Will retry again in the next cycle."
                    self.logger.exception(msg)
                    break
                wfCounters += 1
            self.logger.info("Processed %d workflows from pipeline: %s",
                             wfCounters, pipeLineName)
            wfCounterTotal += wfCounters

        return wfCounterTotal