Python DDM Exemples, WMCore.Services.DDM.DDM.DDM Python Exemples

Exemple #1

0

Afficher le fichier

    def __init__(self, msConfig, mode, logger=None):
        """
        Runs the basic setup and initialization for the MSOutput module
        :microConfig: microservice configuration
        :mode: MSOutput Run mode:
            - MSOutputConsumer:
                Reads The workflow and transfer subscriptions from MongoDB and
                makes transfer subscriptions.
            - MSOutputProducer:
                Fetches Workflows in a given status from Reqmgr2 then creates
                and uploads the documents to MongoDB.
        """
        super(MSOutput, self).__init__(msConfig, logger)

        self.mode = mode
        self.msConfig.setdefault("limitRequestsPerCycle", 500)
        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 600)
        self.msConfig.setdefault("services", ['output'])
        self.msConfig.setdefault("defaultDataManSys", "DDM")
        self.msConfig.setdefault("defaultGroup", "DataOps")
        self.msConfig.setdefault("enableAggSubscr", True)
        self.msConfig.setdefault("enableDataPlacement", False)
        self.msConfig.setdefault("excludeDataTier", ['NANOAOD', 'NANOAODSIM'])
        self.msConfig.setdefault("rucioAccount", 'wma_test')
        self.msConfig.setdefault("mongoDBUrl", 'mongodb://localhost')
        self.msConfig.setdefault("mongoDBPort", 8230)
        self.msConfig.setdefault("streamerBufferFile", None)
        self.uConfig = {}
        self.emailAlert = EmailAlert(self.msConfig)

        self.cric = CRIC(logger=self.logger)
        self.uConfig = {}
        self.campaigns = {}
        self.psn2pnnMap = {}

        msOutIndex = IndexModel('RequestName', unique=True)
        msOutDBConfig = {
            'database':
            'msOutDB',
            'server':
            self.msConfig['mongoDBUrl'],
            'port':
            self.msConfig['mongoDBPort'],
            'logger':
            self.logger,
            'create':
            True,
            'collections': [('msOutRelValColl', msOutIndex),
                            ('msOutNonRelValColl', msOutIndex)]
        }

        self.msOutDB = MongoDB(**msOutDBConfig).msOutDB
        self.msOutRelValColl = self.msOutDB['msOutRelValColl']
        self.msOutNonRelValColl = self.msOutDB['msOutNonRelValColl']
        self.ddm = DDM(
            url=self.msConfig['ddmUrl'],
            logger=self.logger,
            enableDataPlacement=self.msConfig['enableDataPlacement'])

Exemple #2

0

Afficher le fichier

Fichier : MSOutput.py Projet : davidlange6/WMCore

    def makeSubscriptions(self, workflows=[]):
        """
        The common function to make the final subscriptions. It depends on the
        default Data Management System configured through msConfig. Based on that
        The relevant service wrapper is called.
        :return: A list of results from the REST interface of the DMS in question
        """

        # NOTE:
        #    Here is just an example construction of the function. None of the
        #    data structures used to visualise it is correct. To Be Updated
        results = []
        if self.msConfig['defaultDataManSys'] == 'DDM':
            # TODO: Here to put the dryrun mode: True/False
            ddm = DDM(url=self.msConfig['ddmUrl'],
                      logger=self.logger,
                      enableDataPlacement=self.msConfig['enableDataPlacement'])

            ddmReqList = []
            for workflow in workflows:
                for output in workflow['output']:
                    ddmReqList.append(DDMReqTemplate('copy', item=output))

            if self.msConfig['enableAggSubscr']:
                results = ddm.makeAggRequests(ddmReqList, aggKey='item')
            else:
                for ddmReq in ddmReqList:
                    results.append(ddm.makeRequests(ddmReqList, aggKey='item'))

        elif self.msConfig['defaultDataManSys'] == 'PhEDEx':
            pass

        elif self.msConfig['defaultDataManSys'] == 'Rucio':
            pass

        return results

Exemple #3

0

Afficher le fichier

Fichier : DDM_t.py Projet : haozturk/WMCore

    def testConfig(self):
        """
        Test service attributes and the override mechanism
        """
        self.assertEqual(self.myDDM['endpoint'], 'https://dynamo.mit.edu/')
        self.assertEqual(self.myDDM['cacheduration'], 1)
        self.assertEqual(self.myDDM['accept_type'], 'application/json')
        self.assertEqual(self.myDDM['content_type'], 'application/json')

        newParams = {"cacheduration": 100, "content_type": "application/text"}
        ddm = DDM(url='https://BLAH.cern.ch/',
                  configDict=newParams,
                  enableDataPlacement=False)
        self.assertEqual(ddm['endpoint'], 'https://BLAH.cern.ch/')
        self.assertEqual(ddm['cacheduration'], newParams['cacheduration'])
        self.assertEqual(ddm['content_type'], newParams['content_type'])

Exemple #4

0

Afficher le fichier

Fichier : DDM_t.py Projet : haozturk/WMCore

 def setUp(self):
     """
     Setup for unit tests
     """
     super(DDMTest, self).setUp()
     self.myDDM = DDM(enableDataPlacement=False)

Exemple #5

0

Afficher le fichier

Fichier : DDM_t.py Projet : haozturk/WMCore

class DDMTest(EmulatedUnitTestCase):
    """
    Unit tests for DDM Services module
    """
    def __init__(self, methodName='runTest'):
        super(DDMTest, self).__init__(methodName=methodName)

    def setUp(self):
        """
        Setup for unit tests
        """
        super(DDMTest, self).setUp()
        self.myDDM = DDM(enableDataPlacement=False)

    def testConfig(self):
        """
        Test service attributes and the override mechanism
        """
        self.assertEqual(self.myDDM['endpoint'], 'https://dynamo.mit.edu/')
        self.assertEqual(self.myDDM['cacheduration'], 1)
        self.assertEqual(self.myDDM['accept_type'], 'application/json')
        self.assertEqual(self.myDDM['content_type'], 'application/json')

        newParams = {"cacheduration": 100, "content_type": "application/text"}
        ddm = DDM(url='https://BLAH.cern.ch/',
                  configDict=newParams,
                  enableDataPlacement=False)
        self.assertEqual(ddm['endpoint'], 'https://BLAH.cern.ch/')
        self.assertEqual(ddm['cacheduration'], newParams['cacheduration'])
        self.assertEqual(ddm['content_type'], newParams['content_type'])

    def testMakeRequest(self):
        expectedResult = {
            'cache':
            None,
            'group':
            'DataOps',
            'item': [
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM'
            ],
            'n':
            None,
            'site': ['T2_*', 'T1_*_Disk']
        }
        ddmReq = DDMReqTemplate(
            'copy',
            item=[
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM'
            ])

        result = self.myDDM.makeRequest(ddmReq)
        self.assertEqual(expectedResult, result)

    def testMakeAggRequest(self):
        ddmReqList = [None] * 13
        ddmReqList[0] = DDMReqTemplate(
            'copy',
            item=[
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM'
            ])
        ddmReqList[1] = DDMReqTemplate('pollcopy', request_id=46458)
        ddmReqList[2] = DDMReqTemplate(
            'copy',
            item=[
                '/RelValSingleMuPt10Extended/CMSSW_11_1_0_pre5-110X_mcRun4_realistic_v3_2026D48noPU-v1/MINIAODSIM'
            ])
        ddmReqList[3] = DDMReqTemplate(
            'copy',
            item=[
                '/RelValSingleMuPt10Extended/CMSSW_11_1_0_pre5-110X_mcRun4_realistic_v3_2026D48noPU-v1/MINIAODSIM'
            ])
        ddmReqList[4] = DDMReqTemplate(
            'copy',
            item=[
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM'
            ],
            site=['T2_CH_CERN', 'T2_US_MIT'])
        ddmReqList[5] = DDMReqTemplate('pollcopy', request_id=46614)
        ddmReqList[6] = DDMReqTemplate(
            'copy',
            item=[
                '/RelValSingleMuPt10Extended/CMSSW_11_1_0_pre5-110X_mcRun4_realistic_v3_2026D48noPU-v1/MINIAODSIM'
            ])
        ddmReqList[7] = DDMReqTemplate(
            'copy',
            item=[
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM'
            ],
            site=['T2_CH_CERN', 'T2_US_MIT'])
        ddmReqList[8] = DDMReqTemplate('pollcopy', request_id=46627)
        ddmReqList[9] = DDMReqTemplate('pollcopy', request_id=46628)
        ddmReqList[10] = DDMReqTemplate('cancelcopy', request_id=46628)
        ddmReqList[11] = DDMReqTemplate(
            'copy',
            item=[
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM'
            ],
            site=['T2_CH_CERN', 'T2_US_MIT'])
        ddmReqList[12] = DDMReqTemplate('pollcopy', request_id=46633)

        expectedResult = [{
            'item': None,
            'request_id': 46633,
            'site': None,
            'status': None,
            'user': None
        }, {
            'cache':
            None,
            'group':
            'DataOps',
            'item': [
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM',
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM',
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM'
            ],
            'n':
            None,
            'site': ['T2_CH_CERN', 'T2_US_MIT']
        }, {
            'request_id': 46628
        }, {
            'item': None,
            'request_id': 46628,
            'site': None,
            'status': None,
            'user': None
        }, {
            'item': None,
            'request_id': 46627,
            'site': None,
            'status': None,
            'user': None
        }, {
            'cache':
            None,
            'group':
            'DataOps',
            'item': [
                '/RelValSingleMuPt10Extended/CMSSW_11_1_0_pre5-110X_mcRun4_realistic_v3_2026D48noPU-v1/MINIAODSIM',
                '/RelValSingleMuPt10Extended/CMSSW_11_1_0_pre5-110X_mcRun4_realistic_v3_2026D48noPU-v1/MINIAODSIM',
                '/RelValSingleMuPt10Extended/CMSSW_11_1_0_pre5-110X_mcRun4_realistic_v3_2026D48noPU-v1/MINIAODSIM',
                '/LQLQToTopMuTopTau_M-1200_TuneCP5_13TeV_pythia8/RunIIFall17NanoAODv5-PU2017_12Apr2018_Nano1June2019_102X_mc2017_realistic_v7-v1/NANOAODSIM'
            ],
            'n':
            None,
            'site': ['T2_*', 'T1_*_Disk']
        }, {
            'item': None,
            'request_id': 46614,
            'site': None,
            'status': None,
            'user': None
        }, {
            'item': None,
            'request_id': 46458,
            'site': None,
            'status': None,
            'user': None,
        }]

        result = self.myDDM.makeAggRequests(ddmReqList, aggKey='item')
        self.assertEqual(expectedResult, result)

Exemple #6

0

Afficher le fichier

class MSOutput(MSCore):
    """
    MSOutput.py class provides the whole logic behind the Output data placement
    in MicroServices.
    """
    def __init__(self, msConfig, mode, logger=None):
        """
        Runs the basic setup and initialization for the MSOutput module
        :microConfig: microservice configuration
        :mode: MSOutput Run mode:
            - MSOutputConsumer:
                Reads The workflow and transfer subscriptions from MongoDB and
                makes transfer subscriptions.
            - MSOutputProducer:
                Fetches Workflows in a given status from Reqmgr2 then creates
                and uploads the documents to MongoDB.
        """
        super(MSOutput, self).__init__(msConfig, logger)

        self.mode = mode
        self.msConfig.setdefault("limitRequestsPerCycle", 500)
        self.msConfig.setdefault("verbose", True)
        self.msConfig.setdefault("interval", 600)
        self.msConfig.setdefault("services", ['output'])
        self.msConfig.setdefault("defaultDataManSys", "DDM")
        self.msConfig.setdefault("defaultGroup", "DataOps")
        self.msConfig.setdefault("enableAggSubscr", True)
        self.msConfig.setdefault("enableDataPlacement", False)
        self.msConfig.setdefault("excludeDataTier", ['NANOAOD', 'NANOAODSIM'])
        self.msConfig.setdefault("rucioAccount", 'wma_test')
        self.msConfig.setdefault("mongoDBUrl", 'mongodb://localhost')
        self.msConfig.setdefault("mongoDBPort", 8230)
        self.msConfig.setdefault("streamerBufferFile", None)
        self.uConfig = {}
        self.emailAlert = EmailAlert(self.msConfig)

        self.cric = CRIC(logger=self.logger)
        self.uConfig = {}
        self.campaigns = {}
        self.psn2pnnMap = {}

        msOutIndex = IndexModel('RequestName', unique=True)
        msOutDBConfig = {
            'database':
            'msOutDB',
            'server':
            self.msConfig['mongoDBUrl'],
            'port':
            self.msConfig['mongoDBPort'],
            'logger':
            self.logger,
            'create':
            True,
            'collections': [('msOutRelValColl', msOutIndex),
                            ('msOutNonRelValColl', msOutIndex)]
        }

        self.msOutDB = MongoDB(**msOutDBConfig).msOutDB
        self.msOutRelValColl = self.msOutDB['msOutRelValColl']
        self.msOutNonRelValColl = self.msOutDB['msOutNonRelValColl']
        self.ddm = DDM(
            url=self.msConfig['ddmUrl'],
            logger=self.logger,
            enableDataPlacement=self.msConfig['enableDataPlacement'])

    @retry(tries=3, delay=2, jitter=2)
    def updateCaches(self):
        """
        Fetch some data required for the output logic, e.g.:
        * unified configuration
        """
        self.logger.info("Updating local cache information.")
        self.uConfig = self.unifiedConfig()
        campaigns = self.reqmgrAux.getCampaignConfig("ALL_DOCS")
        self.psn2pnnMap = self.cric.PSNtoPNNMap()
        if not self.uConfig:
            raise RuntimeWarning("Failed to fetch the unified configuration")
        elif not campaigns:
            raise RuntimeWarning("Failed to fetch the campaign configurations")
        elif not self.psn2pnnMap:
            raise RuntimeWarning("Failed to fetch PSN x PNN map from CRIC")
        else:
            # let's make campaign look-up easier and more efficient
            self.campaigns = {}
            for camp in campaigns:
                self.campaigns[camp['CampaignName']] = camp

    def execute(self, reqStatus):
        """
        Executes the whole output data placement logic
        :return: summary
        """

        # start threads in MSManager which should call this method
        # NOTE:
        #    Here we should make the whole logic - like:
        #    * Calling the system to fetch the workflows from;
        #    * Creating the workflow objects;
        #    * Pushing them into the back end database system we choose for bookkeeping
        #    * Updating their status in that system, both MsStatus (subscribed,
        #      processing, etc.) and also the Reqmgr status
        #    * Associate and keep track of the requestID/subscriptionID/ruleID
        #      returned by the Data Management System and the workflow
        #      object (through the bookkeeping machinery we choose/develop)
        self.currHost = gethostname()
        self.currThread = current_thread()
        self.currThreadIdent = "%s:%s@%s" % (
            self.currThread.name, self.currThread.ident, self.currHost)

        if self.mode == 'MSOutputProducer':
            summary = self._executeProducer(reqStatus)

        elif self.mode == 'MSOutputConsumer':
            summary = self._executeConsumer()

        else:
            msg = "MSOutput is running in unsupported mode: %s\n" % self.mode
            msg += "Skipping the current run!"
            self.logger.warning(msg)

        return summary

    def _executeProducer(self, reqStatus):
        """
        The function to update caches and to execute the Producer function itslef
        """
        summary = dict(OUTPUT_PRODUCER_REPORT)
        self.updateReportDict(summary, "thread_id", self.currThreadIdent)
        msg = "{}: MSOutput is running in mode: {}".format(
            self.currThreadIdent, self.mode)
        self.logger.info(msg)

        try:
            requestRecords = {}
            for status in reqStatus:
                numRequestRecords = len(requestRecords)
                requestRecords.update(self.getRequestRecords(status))
                msg = "{}: Retrieved {} requests in status {} from ReqMgr2. ".format(
                    self.currThreadIdent,
                    len(requestRecords) - numRequestRecords, status)
                self.logger.info(msg)
        except Exception as err:  # general error
            msg = "{}: Unknown exception while fetching requests from ReqMgr2. ".format(
                self.currThreadIdent)
            msg += "Error: {}".format(str(err))
            self.logger.exception(msg)

        try:
            self.updateCaches()
        except RuntimeWarning as ex:
            msg = "{}: All retries exhausted! Last error was: '{}'".format(
                self.currThreadIdent, str(ex))
            msg += "\nRetrying to update caches again in the next cycle."
            self.logger.error(msg)
            self.updateReportDict(summary, "error", msg)
            return summary
        except Exception as ex:
            msg = "{}: Unknown exception updating caches. ".format(
                self.currThreadIdent)
            msg += "Error: {}".format(str(ex))
            self.logger.exception(msg)
            self.updateReportDict(summary, "error", msg)
            return summary

        try:
            streamer = MSOutputStreamer(
                bufferFile=self.msConfig['streamerBufferFile'],
                requestRecords=requestRecords,
                logger=self.logger)
            total_num_requests = self.msOutputProducer(streamer())
            msg = "{}: Total {} requests processed from the streamer. ".format(
                self.currThreadIdent, total_num_requests)
            self.logger.info(msg)
            self.updateReportDict(summary, "total_num_requests",
                                  total_num_requests)
        except Exception as ex:
            msg = "{}: Unknown exception while running the Producer thread. ".format(
                self.currThreadIdent)
            msg += "Error: {}".format(str(ex))
            self.logger.exception(msg)
            self.updateReportDict(summary, "error", msg)

        return summary

    def _executeConsumer(self):
        """
        The function to execute the Consumer function itslef
        """

        summary = dict(OUTPUT_CONSUMER_REPORT)
        self.updateReportDict(summary, "thread_id", self.currThreadIdent)
        msg = "{}: MSOutput is running in mode: {} ".format(
            self.currThreadIdent, self.mode)
        self.logger.info(msg)
        msg = "{}: Service set to process up to {} requests ".format(
            self.currThreadIdent, self.msConfig["limitRequestsPerCycle"])
        msg += "per cycle per each type 'RelVal' and 'NonRelval' workflows."
        self.logger.info(msg)

        if not self.msConfig['enableDataPlacement']:
            msg = "{} enableDataPlacement = False. ".format(
                self.currThreadIdent)
            msg += "Running the MSOutput service in dry run mode"
            self.logger.warning(msg)

        try:
            total_num_requests = self.msOutputConsumer()
            msg = "{}: Total {} requests processed. ".format(
                self.currThreadIdent, total_num_requests)
            self.logger.info(msg)
            self.updateReportDict(summary, "total_num_requests",
                                  total_num_requests)
        except Exception as ex:
            msg = "{}: Unknown exception while running Consumer thread. ".format(
                self.currThreadIdent)
            msg += "Error: {}".format(str(ex))
            self.logger.exception(msg)
            self.updateReportDict(summary, "error", msg)

        return summary

    def makeSubscriptions(self, workflow):
        """
        The common function to make the final subscriptions. It depends on the
        default Data Management System configured through msConfig. Based on that
        The relevant service wrapper is called.
        :return: A list of results from the REST interface of the DMS in question
        """

        # NOTE:
        #    Here is just an example construction of the function. None of the
        #    data structures used to visualise it is correct. To Be Updated

        if self.msConfig['defaultDataManSys'] == 'DDM':
            # NOTE:
            #    We always aggregate per workflow here (regardless of enableAggSubscr)
            #    and then if we work in strides and enableAggSubscr is True then
            #    we will aggregate all similar subscription for all workflows
            #    in a single subscription - then comes the mess how to map back
            #    which workflow's outputs went to which transfer subscription etc.
            #    (TODO:)
            #
            # NOTE:
            #    Once we move to working in strides of multiple workflows at a time
            #    then the workflow sent to that function should not be a single one
            #    but an iterator of length 'stride' and then we should be doing:
            #    for workflow in workflows:
            if isinstance(workflow, MSOutputTemplate):
                ddmReqList = []
                try:
                    if workflow['isRelVal']:
                        group = 'RelVal'
                    else:
                        group = 'DataOps'

                    for dMap in workflow['destinationOutputMap']:
                        try:
                            ddmRequest = DDMReqTemplate(
                                'copy',
                                item=dMap['datasets'],
                                n=workflow['numberOfCopies'],
                                site=dMap['destination'],
                                group=group)
                        except KeyError as ex:
                            # NOTE:
                            #    If we get to here it is most probably because the 'site'
                            #    mandatory field to the DDM request is missing (due to an
                            #    'ALCARECO' dataset from a Relval workflow or similar).
                            #    Since this is expected to happen a lot, we'd better just
                            #    log a warning and continue
                            msg = "Could not create DDMReq for Workflow: {}".format(
                                workflow['RequestName'])
                            msg += "Error: {}".format(ex)
                            self.logger.warning(msg)
                            continue
                        ddmReqList.append(ddmRequest)

                except Exception as ex:
                    msg = "Could not create DDMReq for Workflow: {}".format(
                        workflow['RequestName'])
                    msg += "Error: {}".format(ex)
                    self.logger.exception(msg)
                    return workflow

                try:
                    # In the message bellow we may want to put the list of datasets too
                    msg = "Making transfer subscriptions for %s"
                    self.logger.info(msg, workflow['RequestName'])

                    if ddmReqList:
                        ddmResultList = self.ddm.makeAggRequests(ddmReqList,
                                                                 aggKey='item')
                    else:
                        # NOTE:
                        #    Nothing else to be done here. We mark the document as
                        #    done so we do not iterate through it multiple times
                        msg = "Skip submissions for %s. Either all data Tiers were "
                        msg += "excluded or there were no Output Datasets at all."
                        self.logger.warning(msg, workflow['RequestName'])
                        self.docKeyUpdate(workflow, transferStatus='done')
                        return workflow
                except Exception as ex:
                    msg = "Could not make transfer subscription for Workflow: {}".format(
                        workflow['RequestName'])
                    msg += "Error: {}".format(ex)
                    self.logger.exception(msg)
                    return workflow

                ddmStatusList = [
                    'new', 'activated', 'completed', 'rejected', 'cancelled'
                ]
                transferIDs = []
                transferStatusList = []
                for ddmResult in ddmResultList:
                    if 'data' in ddmResult.keys():
                        id = deepcopy(ddmResult['data'][0]['request_id'])
                        status = deepcopy(ddmResult['data'][0]['status'])
                        transferStatusList.append({
                            'transferID': id,
                            'status': status
                        })
                        transferIDs.append(id)

                if transferStatusList and all(
                        map(
                            lambda x: True
                            if x['status'] in ddmStatusList else False,
                            transferStatusList)):
                    self.docKeyUpdate(workflow,
                                      transferStatus='done',
                                      transferIDs=transferIDs)
                    return workflow
                else:
                    self.docKeyUpdate(workflow, transferStatus='incomplete')
                    msg = "No data found in ddmResults for %s. Either dry run mode or " % workflow[
                        'RequestName']
                    msg += "broken transfer submission to DDM. "
                    msg += "ddmResults: \n%s" % pformat(ddmResultList)
                    self.logger.warning(msg)
                return workflow

            elif isinstance(workflow, (list, set, CommandCursor)):
                ddmRequests = {}
                for wflow in workflow:
                    wflowName = wflow['RequestName']
                    ddmRequests[wflowName] = DDMReqTemplate(
                        'copy',
                        item=wflow['OutputDatasets'],
                        n=wflow['numberOfCopies'],
                        site=wflow['destination'])
                if self.msConfig['enableAggSubscr']:
                    # ddmResults = self.ddm.makeAggRequests(ddmRequests.values(), aggKey='item')
                    # TODO:
                    #    Here to deal with the reverse mapping of DDM request_id to workflow
                    pass
                else:
                    # for wflowName, ddmReq in ddmRequests.items():
                    #     ddmResults.append(self.ddm.makeRequests(ddmReq))
                    # TODO:
                    #    Here to deal with making request per workflow and
                    #    reconstructing and returning the same type of object
                    #    as the one that have been passed to the current call.
                    pass
            else:
                msg = "Unsupported type %s for workflows!\n" % type(workflow)
                msg += "Skipping this call"
                self.logger.error(msg)

        elif self.msConfig['defaultDataManSys'] == 'PhEDEx':
            pass

        elif self.msConfig['defaultDataManSys'] == 'Rucio':
            pass

        # NOTE:
        #    if we are about to implement this through a pipeline we MUST not
        #    return the result here but the WHOLE document with updated fields
        #    for the transfer as it will be passed to the next function in
        #    the pipeline and uploaded to MongoDB
        return workflow

    def getRequestRecords(self, reqStatus):
        """
        Queries ReqMgr2 for requests in a given status.
        NOTE: to be taken from MSTransferor with minor changes
        """

        # NOTE:
        #    If we are about to use an additional database for book keeping like
        #    MongoDB, we can fetch up to 'limitRequestsPerCycle' and keep track
        #    their status.

        # The following is taken from MSMonitor, just for an example.
        # get requests from ReqMgr2 data-service for given status
        # here with detail=False we get back list of records
        result = self.reqmgr2.getRequestByStatus([reqStatus], detail=True)
        if not result:
            requests = {}
        else:
            requests = result[0]
        self.logger.info('  retrieved %s requests in status: %s',
                         len(requests), reqStatus)

        return requests

    def msOutputConsumer(self):
        """
        A top level function to drive the creation and book keeping of all the
        subscriptions to the Data Management System
        """
        # DONE:
        #    Done: To check if the 'enableDataPlacement' flag is really taken into account
        #    Done: To make this for both relvals and non relvals
        #    Done: To return the result
        #    Done: To make report document
        #    Done: To build it through a pipe
        #    Done: To write back the updated document to MonogoDB
        msPipelineRelVal = Pipeline(name="MSOutputConsumer PipelineRelVal",
                                    funcLine=[
                                        Functor(self.docReadfromMongo,
                                                self.msOutRelValColl,
                                                setTaken=False),
                                        Functor(self.makeSubscriptions),
                                        Functor(self.docKeyUpdate,
                                                isTaken=False,
                                                isTakenBy=None,
                                                lastUpdate=int(time())),
                                        Functor(self.docUploader,
                                                self.msOutRelValColl,
                                                update=True,
                                                keys=[
                                                    'isTaken', 'lastUpdate',
                                                    'transferStatus',
                                                    'transferIDs'
                                                ]),
                                        Functor(self.docDump,
                                                pipeLine='PipelineRelVal'),
                                        Functor(self.docCleaner)
                                    ])
        msPipelineNonRelVal = Pipeline(
            name="MSOutputConsumer PipelineNonRelVal",
            funcLine=[
                Functor(self.docReadfromMongo,
                        self.msOutNonRelValColl,
                        setTaken=False),
                Functor(self.makeSubscriptions),
                Functor(self.docKeyUpdate,
                        isTaken=False,
                        isTakenBy=None,
                        lastUpdate=int(time())),
                Functor(self.docUploader,
                        self.msOutNonRelValColl,
                        update=True,
                        keys=[
                            'isTaken', 'lastUpdate', 'transferStatus',
                            'transferIDs'
                        ]),
                Functor(self.docDump, pipeLine='PipelineNonRelVal'),
                Functor(self.docCleaner)
            ])

        # NOTE:
        #    If we actually have any exception that has reached to the top level
        #    exception handlers (eg. here - outside the pipeLine), this means
        #    some function from within the pipeLine has not caught it and the msOutDoc
        #    has left the pipe and died before the relevant document in MongoDB
        #    has been released (its flag 'isTaken' to be set back to False)
        wfCounters = {}
        for pipeLine in [msPipelineRelVal, msPipelineNonRelVal]:
            pipeLineName = pipeLine.getPipelineName()
            wfCounters[pipeLineName] = 0
            while wfCounters[pipeLineName] < self.msConfig[
                    'limitRequestsPerCycle']:
                # take only workflows:
                # - which are not already taken or
                # - a transfer subscription have never been done for them and
                # - avoid retrying workflows in the same cycle
                # NOTE:
                #    Once we are running the service not in a dry run mode we may
                #    consider adding and $or condition in mQueryDict for transferStatus:
                #    '$or': [{'transferStatus': None},
                #            {'transferStatus': 'incomplete'}]
                #    So that we can collect also workflows with partially or fully
                #    unsuccessful transfers
                currTime = int(time())
                treshTime = currTime - self.msConfig['interval']
                mQueryDict = {
                    '$and': [{
                        'isTaken': False
                    }, {
                        '$or': [{
                            'transferStatus': None
                        }, {
                            'transferStatus': 'incomplete'
                        }]
                    }, {
                        '$or': [{
                            'lastUpdate': None
                        }, {
                            'lastUpdate': {
                                '$lt': treshTime
                            }
                        }]
                    }]
                }

                # FIXME:
                #    To redefine those exceptions as MSoutputExceptions and
                #    start using those here so we do not mix with general errors
                try:
                    pipeLine.run(mQueryDict)
                except KeyError as ex:
                    msg = "%s Possibly malformed record in MongoDB. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Continue to the next document."
                    self.logger.exception(msg)
                    continue
                except TypeError as ex:
                    msg = "%s Possibly malformed record in MongoDB. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Continue to the next document."
                    self.logger.exception(msg)
                    continue
                except EmptyResultError as ex:
                    msg = "%s All relevant records in MongoDB exhausted. " % pipeLineName
                    msg += "We are done for the current cycle."
                    self.logger.info(msg)
                    break
                except Exception as ex:
                    msg = "%s General Error from pipeline. Err: %s. " % (
                        pipeLineName, str(ex))
                    msg += "Giving up Now."
                    self.logger.error(msg)
                    self.logger.exception(ex)
                    break
                wfCounters[pipeLineName] += 1

        wfCounterTotal = sum(wfCounters.values())
        return wfCounterTotal

    def msOutputProducer(self, requestRecords):
        """
        A top level function to drive the upload of all the documents to MongoDB
        """

        # DONE:
        #    To implement this as a functional pipeline in the following sequence:
        #    1) document streamer - to generate all the records coming from Reqmgr2
        #    2) document stripper - to cut all the cut all the kews we do not need
        #       Mongodb document creator - to pass it through the MongoDBTemplate
        #    3) document updater - fetch & update all the needed info like campaign config etc.
        #    4) MongoDB upload/update - to upload/update the document in Mongodb

        # DONE:
        #    to have the requestRecords generated through a call to docStreamer
        #    and the call should happen from inside this function so that all
        #    the Objects generated do not leave the scope of this function and
        #    with that  to reduce big memory footprint

        # DONE:
        #    to set a destructive function at the end of the pipeline
        # NOTE:
        #    To discuss the collection names
        # NOTE:
        #    Here we should never use docUploader with `update=True`, because
        #    this will erase the latest state of already existing and fully or
        #    partially processed documents by the Consumer pipeline
        self.logger.info("Running the msOutputProducer ...")
        msPipelineRelVal = Pipeline(name="MSOutputProducer PipelineRelVal",
                                    funcLine=[
                                        Functor(self.docTransformer),
                                        Functor(self.docKeyUpdate,
                                                isRelVal=True),
                                        Functor(self.docInfoUpdate,
                                                pipeLine='PipelineRelVal'),
                                        Functor(self.docUploader,
                                                self.msOutRelValColl),
                                        Functor(self.docCleaner)
                                    ])
        msPipelineNonRelVal = Pipeline(
            name="MSOutputProducer PipelineNonRelVal",
            funcLine=[
                Functor(self.docTransformer),
                Functor(self.docKeyUpdate, isRelVal=False),
                Functor(self.docInfoUpdate, pipeLine='PipelineNonRelVal'),
                Functor(self.docUploader, self.msOutNonRelValColl),
                Functor(self.docCleaner)
            ])
        # TODO:
        #    To generate the object from within the Function scope see above.
        counter = 0
        for _, request in requestRecords:
            counter += 1
            try:
                if request.get('SubRequestType') == 'RelVal':
                    pipeLine = msPipelineRelVal
                    pipeLineName = pipeLine.getPipelineName()
                    pipeLine.run(request)
                else:
                    pipeLine = msPipelineNonRelVal
                    pipeLineName = pipeLine.getPipelineName()
                    pipeLine.run(request)
            except KeyError as ex:
                msg = "%s Possibly broken read from Reqmgr2 API or other Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Continue to the next document."
                self.logger.exception(msg)
                continue
            except TypeError as ex:
                msg = "%s Possibly broken read from Reqmgr2 API or other Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Continue to the next document."
                self.logger.exception(msg)
                continue
            except Exception as ex:
                msg = "%s General Error from pipeline. Err: %s. " % (
                    pipeLineName, str(ex))
                msg += "Giving up Now."
                self.logger.error(msg)
                self.logger.exception(ex)
                break
        return counter

    def docTransformer(self, doc):
        """
        A function used to transform a request record from reqmgr2 to a document
        suitable for uploading to Mongodb
        """
        # Solution 1: Destructive function - to force clear of the the externally
        #             referenced object and to return a new one (current solution)
        #             NOTE: Leaves an empty dictionary behind (the clear method just
        #                   clears all the keys of the dict, but does not delete it)
        # Solution 2: To work in place (will keep the dynamic structure of the passed dict)
        # Solution 3: To have 2 object buffers for the two diff types outside the function
        try:
            msOutDoc = MSOutputTemplate(doc)
            doc.clear()
        except Exception as ex:
            msg = "ERR: Unable to create MSOutputTemplate for document: \n%s\n" % pformat(
                doc)
            msg += "ERR: %s" % str(ex)
            self.logger.exception(msg)
            raise ex
        return msOutDoc

    def docDump(self, msOutDoc, pipeLine=None):
        """
        Prints document contents
        """
        msg = "{}: {}: Processed 'msOutDoc' with '_id': {}.".format(
            self.currThreadIdent, pipeLine, msOutDoc['_id'])
        self.logger.info(msg)
        self.logger.debug(pformat(msOutDoc))
        return msOutDoc

    def docKeyUpdate(self, msOutDoc, **kwargs):
        """
        A function used to update one or few particular fields in a document
        :**kwargs: The keys/value pairs to be updated (will be tested against MSOutputTemplate)
        """
        for key, value in kwargs.items():
            try:
                msOutDoc.setKey(key, value)
                msOutDoc.updateTime()
            except Exception as ex:
                msg = "Cannot update key {} for doc: {}\n".format(
                    key, msOutDoc['_id'])
                msg += "Error: {}".format(str(ex))
                self.logger.warning(msg)
        return msOutDoc

    def docInfoUpdate(self, msOutDoc, pipeLine=None):
        """
        A function intended to fetch and fill into the document all the needed
        additional information like campaignOutputMap etc.
        """

        # Fill the destinationOutputMap first
        destinationOutputMap = []
        wflowDstSet = set()
        updateDict = {}
        for dataset in msOutDoc['OutputDatasets']:
            _, dsn, procString, dataTier = dataset.split('/')
            # NOTE:
            #    Data tiers that have been configured to be excluded will never
            #    enter the destinationOutputMap
            if dataTier in self.msConfig['excludeDataTier']:
                # msg = "%s: %s: "
                # msg += "Data Tier: %s is blacklisted. "
                # msg += "Skipping dataset placement for: %s:%s"
                # self.logger.info(msg,
                #                  self.currThreadIdent,
                #                  pipeLine,
                #                  dataTier,
                #                  msOutDoc['RequestName'],
                #                  dataset)
                continue

            destination = set()

            if msOutDoc['isRelVal']:
                if dataTier != "RECO" and dataTier != "ALCARECO":
                    destination.add('T2_CH_CERN')
                if dataTier == "GEN-SIM":
                    destination.add('T1_US_FNAL_Disk')
                if dataTier == "GEN-SIM-DIGI-RAW":
                    destination.add('T1_US_FNAL_Disk')
                if dataTier == "GEN-SIM-RECO":
                    destination.add('T1_US_FNAL_Disk')
                if "RelValTTBar" in dsn and "TkAlMinBias" in procString and dataTier != "ALCARECO":
                    destination.add('T2_CH_CERN')
                if "MinimumBias" in dsn and "SiStripCalMinBias" in procString and dataTier != "ALCARECO":
                    destination.add('T2_CH_CERN')
            else:
                # FIXME:
                #    Here we need to use the already created campaignMap for
                #    building the destinationOutputMap for nonRelVal workflows.
                #    For the time being it is a fallback to all T1_* and all T2_*.
                #    Once we migrate to Rucio we should change those defaults to
                #    whatever is the format in Rucio (eg. referring a subscription
                #    rule like: "store it at a good site" or "Store in the USA" etc.)
                destination.add('T1_*')
                destination.add('T2_*')

            dMap = {'datasets': [dataset], 'destination': list(destination)}
            destinationOutputMap.append(dMap)
            wflowDstSet |= destination

        # here we try to aggregate the destination map per destination
        aggDstMap = []

        # populate the first element in the aggregated list
        if len(destinationOutputMap) != 0:
            aggDstMap.append(destinationOutputMap.pop())

        # feed the rest
        while len(destinationOutputMap) != 0:
            dMap = destinationOutputMap.pop()
            found = False
            for aggMap in aggDstMap:
                if set(dMap['destination']) == set(aggMap['destination']):
                    # Check if the two objects are not references to one and the
                    # same object. Only then copy the values of the dMap,
                    # otherwise we will enter an endless cycle.
                    if dMap is not aggMap:
                        for i in dMap['datasets']:
                            aggMap['datasets'].append(i)
                    found = True
                    del (dMap)
                    break
            if not found:
                aggDstMap.append(dMap)

        # finally reassign the destination map with the aggregated one
        destinationOutputMap = aggDstMap

        wflowDstList = list(wflowDstSet)
        updateDict['destination'] = wflowDstList
        updateDict['destinationOutputMap'] = destinationOutputMap
        try:
            msOutDoc.updateDoc(updateDict, throw=True)
        except Exception as ex:
            msg = "%s: %s: Could not update the additional information for "
            msg += "'msOutDoc' with '_id': %s \n"
            msg += "Error: %s"
            self.logger.exception(msg, self.currThreadIdent, pipeLine,
                                  msOutDoc['_id'], str(ex))
        return msOutDoc

    def docUploader(self,
                    msOutDoc,
                    dbColl,
                    update=False,
                    keys=None,
                    stride=None):
        """
        A function to upload documents to MongoDB. The session object to the  relevant
        database and Collection must be passed as arguments
        :msOutDocs: A list of documents of type MSOutputTemplate
        :dbColl: an object containing an active connection to a MongoDB Collection
        :stride: the max number of documents we are about to upload at once
        :update: A flag to trigger document update in MongoDB in case of duplicates
        :keys:   A list of keys to update. If missing the whole document will be updated
        """
        # DONE: to determine the collection to which the document belongs based
        #       on 'isRelval' key or some other criteria
        # NOTE: We must return the document(s) at the end so that it can be explicitly
        #       deleted outside the pipeline

        # Skipping documents avoiding index unique property (documents having the
        # same value for the indexed key as an already uploaded document)
        try:
            dbColl.insert_one(msOutDoc)
        except errors.DuplicateKeyError:
            # DONE:
            #    Here we may wish to double check and make document update, so
            #    that a change of the Request on ReqMgr may be reflected here too
            # NOTE:
            #    If we use the 'update' option with a fresh document created from
            #    Reqmgr and we overwrite an already existing document in MongoDB
            #    which have been already worked on - we will loose the information
            #    that have been stored in the MonggDB - so always use 'update'
            #    with the proper set of keys to be updated
            if not keys:
                keys = []

            # update only the requested keys:
            if update and keys:
                updateDict = {}
                for key in keys:
                    updateDict[key] = msOutDoc[key]
                msOutDoc = dbColl.find_one_and_update(
                    {'_id': msOutDoc['_id']}, {'$set': updateDict},
                    return_document=ReturnDocument.AFTER)
            if update and not keys:
                msOutDoc = dbColl.find_one_and_update(
                    {'_id': msOutDoc['_id']}, {'$set': msOutDoc},
                    return_document=ReturnDocument.AFTER)
        return msOutDoc

    def docReadfromMongo(self, mQueryDict, dbColl, setTaken=False):
        """
        Reads a single Document from MongoDB and if setTaken flag is on then
        Sets the relevant flags (isTaken, isTakenBy) in the document at MongoDB
        """
        # NOTE:
        #    In case the current query returns an empty document from MongoDB
        #    (eg. all workflows have been processed) the MSOutputTemplate
        #    will throw an error. We should catch this one here and interrupt
        #    the pipeLine traversal, otherwise an error either here or in one of the
        #    following stages will most probably occur and the whole run will be broken.
        if setTaken:
            lastUpdate = int(time())
            retrString = self.currThreadIdent
            mongoDoc = dbColl.find_one_and_update(
                mQueryDict, {
                    '$set': {
                        'isTaken': True,
                        'isTakenBy': retrString,
                        'lastUpdate': lastUpdate
                    }
                },
                return_document=ReturnDocument.AFTER)
        else:
            mongoDoc = dbColl.find_one(mQueryDict)
        if mongoDoc:
            try:
                msOutDoc = MSOutputTemplate(mongoDoc)
            except Exception as ex:
                msg = "Unable to create msOutDoc from %s." % mongoDoc
                self.logger.warning(msg)
                raise ex
                # NOTE:
                #    Here if we do not update the isTaken flag in MongoDB back
                #    to False, the document will not be released in MongoDb and
                #    will stay locked. If we are ending up here it means for some
                #    reason we have a malformed document in MongoDB. We should make
                #    a design choice - should we release the document or should
                #    we leave it locked for further investigations or maybe
                #    mark it with another flag eg. 'isMalformed': True
        else:
            raise EmptyResultError
        return msOutDoc

    def docCleaner(self, doc):
        """
        Calls the dictionary internal method clear() and purges all the contents
        of the document
        """
        return doc.clear()