Example #1
0
    def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None,
                               configDoc = None, couchURL = None,
                               couchDBName = None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        outputModules = {}
        if configDoc != None and configDoc != "":
            configCache = ConfigCache(couchURL, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if scenarioFunc in [ "promptReco", "expressProcessing" ]:
                for output in scenarioArgs.get('outputs',[]):
                    dataTier = output['dataTier']
                    moduleLabel = output['moduleLabel']
                    filterName = output.get('filterName', None)
                    outputModules[moduleLabel] = {'dataTier' : dataTier,
                                                  'filterName' : filterName}
            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get('skims',[]):
                    dataTier = "ALCARECO"
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    filterName = alcaSkim
                    outputModules[moduleLabel] = {'dataTier' : dataTier,
                                                  'filterName' : filterName}

        return outputModules
Example #2
0
    def determineOutputModules(self,
                               scenarioFunc=None,
                               scenarioArgs=None,
                               configDoc=None,
                               couchURL=None,
                               couchDBName=None,
                               configCacheUrl=None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        # set default scenarioArgs to empty dictionary if it is None.
        scenarioArgs = scenarioArgs or {}

        outputModules = {}
        if configDoc != None and configDoc != "":
            url = configCacheUrl or couchURL
            configCache = ConfigCache(url, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if 'outputs' in scenarioArgs and scenarioFunc in [
                    "promptReco", "expressProcessing", "repack"
            ]:
                for output in scenarioArgs.get('outputs', []):
                    moduleLabel = output['moduleLabel']
                    outputModules[moduleLabel] = {
                        'dataTier': output['dataTier']
                    }
                    if output.has_key('primaryDataset'):
                        outputModules[moduleLabel]['primaryDataset'] = output[
                            'primaryDataset']
                    if output.has_key('filterName'):
                        outputModules[moduleLabel]['filterName'] = output[
                            'filterName']

            elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco":
                for dataTier in scenarioArgs.get('writeTiers'):
                    moduleLabel = "%soutput" % dataTier
                    outputModules[moduleLabel] = {'dataTier': dataTier}

            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get('skims', []):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    if alcaSkim.startswith("PromptCalibProd"):
                        dataTier = "ALCAPROMPT"
                    else:
                        dataTier = "ALCARECO"
                    outputModules[moduleLabel] = {
                        'dataTier': dataTier,
                        'primaryDataset': scenarioArgs.get('primaryDataset'),
                        'filterName': alcaSkim
                    }

        return outputModules
Example #3
0
    def validateConfigCacheExists(self,
                                  configID,
                                  couchURL,
                                  couchDBName,
                                  getOutputModules=False):
        """
        _validateConfigCacheExists_

        If we have a configCache, we should probably try and load it.
        """

        if configID == '' or configID == ' ':
            self.raiseValidationException(
                msg="ConfigCacheID is invalid and cannot be loaded")

        configCache = ConfigCache(dbURL=couchURL,
                                  couchDBName=couchDBName,
                                  id=configID)
        try:
            configCache.loadByID(configID=configID)
        except ConfigCacheException:
            self.raiseValidationException(
                msg="Failure to load ConfigCache while validating workload")

        duplicateCheck = {}
        try:
            outputModuleInfo = configCache.getOutputModuleInfo()
        except Exception:
            # Something's gone wrong with trying to open the configCache
            msg = "Error in getting output modules from ConfigCache during workload validation.  Check ConfigCache formatting!"
            self.raiseValidationException(msg=msg)
        for outputModule in outputModuleInfo.values():
            dataTier = outputModule.get('dataTier', None)
            filterName = outputModule.get('filterName', None)
            if not dataTier:
                self.raiseValidationException(
                    msg="No DataTier in output module.")

            # Add dataTier to duplicate dictionary
            if not dataTier in duplicateCheck.keys():
                duplicateCheck[dataTier] = []
            if filterName in duplicateCheck[dataTier]:
                # Then we've seen this combination before
                self.raiseValidationException(
                    msg="Duplicate dataTier/filterName combination.")
            else:
                duplicateCheck[dataTier].append(filterName)

        if getOutputModules:
            return outputModuleInfo

        return
Example #4
0
    def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None,
                               configDoc = None, couchURL = None,
                               couchDBName = None, configCacheUrl = None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        # set default scenarioArgs to empty dictionary if it is None.
        scenarioArgs = scenarioArgs or {}

        outputModules = {}
        if configDoc != None and configDoc != "":
            url = configCacheUrl or couchURL
            if  (url, couchDBName) in self.config_cache:
                configCache = self.config_cache[(url, couchDBName)]
            else:
                configCache = ConfigCache(url, couchDBName, True)
                self.config_cache[(url, couchDBName)] = configCache
            #TODO: need to change to DataCache
            #configCache.loadDocument(configDoc)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]:
                for output in scenarioArgs.get('outputs', []):
                    moduleLabel = output['moduleLabel']
                    outputModules[moduleLabel] = { 'dataTier' : output['dataTier'] }
                    if 'primaryDataset' in output:
                        outputModules[moduleLabel]['primaryDataset'] = output['primaryDataset']
                    if 'filterName' in output:
                        outputModules[moduleLabel]['filterName'] = output['filterName']

            elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco":
                for dataTier in scenarioArgs.get('writeTiers'):
                    moduleLabel = "%soutput" % dataTier
                    outputModules[moduleLabel] = { 'dataTier' : dataTier }

            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get('skims',[]):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    if alcaSkim.startswith("PromptCalibProd"):
                        dataTier = "ALCAPROMPT"
                    else:
                        dataTier = "ALCARECO"
                    outputModules[moduleLabel] = { 'dataTier' : dataTier,
                                                   'primaryDataset' : scenarioArgs.get('primaryDataset'),
                                                   'filterName' : alcaSkim }

        return outputModules
Example #5
0
    def validateConfigCacheExists(self, configID, couchURL, couchDBName,
                                  getOutputModules = False):
        """
        _validateConfigCacheExists_

        If we have a configCache, we should probably try and load it.
        """

        if configID == '' or configID == ' ':
            self.raiseValidationException(msg = "ConfigCacheID is invalid and cannot be loaded")

        configCache = ConfigCache(dbURL = couchURL, couchDBName = couchDBName,
                                  id = configID)
        try:
            configCache.loadByID(configID = configID)
        except ConfigCacheException:
            self.raiseValidationException(msg = "Failure to load ConfigCache while validating workload")

        duplicateCheck = {}
        try:
            outputModuleInfo = configCache.getOutputModuleInfo()
        except Exception:
            # Something's gone wrong with trying to open the configCache
            msg = "Error in getting output modules from ConfigCache during workload validation.  Check ConfigCache formatting!"
            self.raiseValidationException(msg = msg)
        for outputModule in outputModuleInfo.values():
            dataTier   = outputModule.get('dataTier', None)
            filterName = outputModule.get('filterName', None)
            if not dataTier:
                self.raiseValidationException(msg = "No DataTier in output module.")

            # Add dataTier to duplicate dictionary
            if not dataTier in duplicateCheck.keys():
                duplicateCheck[dataTier] = []
            if filterName in duplicateCheck[dataTier]:
                # Then we've seen this combination before
                self.raiseValidationException(msg = "Duplicate dataTier/filterName combination.")
            else:
                duplicateCheck[dataTier].append(filterName)

        if getOutputModules:
            return outputModuleInfo

        return
Example #6
0
    def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None,
                               configDoc = None, couchURL = None,
                               couchDBName = None, configCacheUrl = None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        outputModules = {}
        if configDoc != None and configDoc != "":
            url = configCacheUrl or couchURL
            configCache = ConfigCache(url, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]:
                for output in scenarioArgs.get('outputs', []):
                    moduleLabel = output['moduleLabel']
                    outputModules[moduleLabel] = { 'dataTier' : output['dataTier'] }
                    if output.has_key('primaryDataset'):
                        outputModules[moduleLabel]['primaryDataset'] = output['primaryDataset']
                    if output.has_key('filterName'):
                        outputModules[moduleLabel]['filterName'] = output['filterName']

            elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco":
                for dataTier in scenarioArgs.get('writeTiers'):
                    moduleLabel = "%soutput" % dataTier
                    outputModules[moduleLabel] = { 'dataTier' : dataTier }

            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get('skims',[]):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    if alcaSkim == "PromptCalibProd":
                        dataTier = "ALCAPROMPT"
                    else:
                        dataTier = "ALCARECO"
                    outputModules[moduleLabel] = { 'dataTier' : dataTier,
                                                   'primaryDataset' : scenarioArgs.get('primaryDataset'),
                                                   'filterName' : alcaSkim }

        return outputModules
Example #7
0
    def determineOutputModules(self, scenarioName = None, scenarioArgs = None,
                               configDoc = None, couchURL = None,
                               couchDBName = None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        outputModules = {}
        if configDoc != None and configDoc != "":
            configCache = ConfigCache(couchURL, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            for dataTier in scenarioArgs.get("writeTiers",[]):
                outputModuleName = "output%s%s" % (dataTier, dataTier)
                outputModules[outputModuleName] = {"dataTier": dataTier,
                                                   "filterName": None}

        return outputModules
Example #8
0
    def determineOutputModules(
        self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None
    ):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        outputModules = {}
        if configDoc != None and configDoc != "":
            configCache = ConfigCache(couchURL, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if "outputs" in scenarioArgs and scenarioFunc in ["promptReco", "expressProcessing", "repack"]:
                for output in scenarioArgs.get("outputs", []):
                    moduleLabel = output["moduleLabel"]
                    outputModules[moduleLabel] = {"dataTier": output["dataTier"]}
                    if output.has_key("primaryDataset"):
                        outputModules[moduleLabel]["primaryDataset"] = output["primaryDataset"]
                    if output.has_key("filterName"):
                        outputModules[moduleLabel]["filterName"] = output["filterName"]

            elif "writeTiers" in scenarioArgs and scenarioFunc == "promptReco":
                for dataTier in scenarioArgs.get("writeTiers"):
                    moduleLabel = "%soutput" % dataTier
                    outputModules[moduleLabel] = {"dataTier": dataTier}

            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get("skims", []):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    outputModules[moduleLabel] = {
                        "dataTier": "ALCARECO",
                        "primaryDataset": scenarioArgs.get("primaryDataset"),
                        "filterName": alcaSkim,
                    }

        return outputModules
Example #9
0
    def determineOutputModules(
        self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None, configCacheUrl=None
    ):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        # set default scenarioArgs to empty dictionary if it is None.
        scenarioArgs = scenarioArgs or {}

        outputModules = {}
        if configDoc != None and configDoc != "":
            url = configCacheUrl or couchURL
            if (url, couchDBName) in self.config_cache:
                configCache = self.config_cache[(url, couchDBName)]
            else:
                configCache = ConfigCache(url, couchDBName, True)
                self.config_cache[(url, couchDBName)] = configCache
            # TODO: need to change to DataCache
            # configCache.loadDocument(configDoc)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if "outputs" in scenarioArgs and scenarioFunc in ["promptReco", "expressProcessing", "repack"]:

                for output in scenarioArgs.get("outputs", []):

                    moduleLabel = output["moduleLabel"]
                    outputModules[moduleLabel] = {"dataTier": output["dataTier"]}
                    if "primaryDataset" in output:
                        outputModules[moduleLabel]["primaryDataset"] = output["primaryDataset"]
                    if "filterName" in output:
                        outputModules[moduleLabel]["filterName"] = output["filterName"]

                for physicsSkim in scenarioArgs.get("PhysicsSkims", []):
                    skimToDataTier = {
                        "LogError": "RAW-RECO",
                        "LogErrorMonitor": "USER",
                        "ZElectron": "RAW-RECO",
                        "ZMu": "RAW-RECO",
                        "MuTau": "RAW-RECO",
                        "TopMuEG": "RAW-RECO",
                        "EcalActivity": "RAW-RECO",
                        "CosmicSP": "RAW-RECO",
                        "CosmicTP": "RAW-RECO",
                        "ZMM": "RAW-RECO",
                        "Onia": "RECO",
                        "HighPtJet": "RAW-RECO",
                        "D0Meson": "RECO",
                        "Photon": "AOD",
                        "ZEE": "AOD",
                        "BJet": "AOD",
                        "OniaCentral": "RECO",
                        "OniaPeripheral": "RECO",
                        "SingleTrack": "AOD",
                        "MinBias": "AOD",
                        "OniaUPC": "RAW-RECO",
                        "HighMET": "RECO",
                        "BPHSkim": "USER",
                    }
                    dataTier = skimToDataTier.get(physicsSkim, "USER")
                    moduleLabel = "SKIMStream%s" % physicsSkim
                    outputModules[moduleLabel] = {"dataTier": dataTier, "filterName": physicsSkim}

            elif scenarioFunc == "alcaSkim":

                for alcaSkim in scenarioArgs.get("skims", []):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    if alcaSkim.startswith("PromptCalibProd"):
                        dataTier = "ALCAPROMPT"
                    else:
                        dataTier = "ALCARECO"
                    outputModules[moduleLabel] = {
                        "dataTier": dataTier,
                        "primaryDataset": scenarioArgs.get("primaryDataset"),
                        "filterName": alcaSkim,
                    }

        return outputModules