Example #1
0
def createAlgoFromInfo(info):
    """
    Create an Algo object from basic information

    """
    
    algo = {'ApplicationName':    info.get('ApplicationName'),
            'ApplicationFamily':  info.get('ApplicationFamily'),
            'ApplicationVersion': info.get('ApplicationVersion'),
            'PSetHash':           info.get('PSetHash'),
            'PSetContent':        None,
            'InDBS':              info.get('AlgoInDBS', None)
            }

    configString = info.get('PSetContent')
    if configString:
        split = configString.split(';;')
        cacheURL = split[0]
        cacheDB  = split[1]
        configID = split[2]
        try:
            configCache = ConfigCache(cacheURL, cacheDB)
            configCache.loadByID(configID)
            algo['PSetContent'] = configCache.getConfig()
        except Exception, ex:
            msg =  "Exception in getting configCache from DB\n"
            msg += "Ignoring this exception and continuing without config.\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            logging.debug("URL: %s,  DB: %s,  ID: %s" % (cacheURL, cacheDB, configID))
Example #2
0
    def createConfig(self, bad = False):
        """
        _createConfig_

        Create a config of some sort that we can load out of ConfigCache
        """
        
        PSetTweak = {'process': {'outputModules_': ['ThisIsAName'],
                                 'ThisIsAName': {'dataset': {'dataTier': 'RECO',
                                                             'filterName': 'Filter'}}}}

        BadTweak  = {'process': {'outputModules_': ['ThisIsAName1', 'ThisIsAName2'],
                                 'ThisIsAName1': {'dataset': {'dataTier': 'RECO',
                                                             'filterName': 'Filter'}},
                                 'ThisIsAName2': {'dataset': {'dataTier': 'RECO',
                                                             'filterName': 'Filter'}}}}

        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDBName)
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        if bad:
            configCache.setPSetTweaks(PSetTweak = BadTweak)
        else:
            configCache.setPSetTweaks(PSetTweak = PSetTweak)
        configCache.save()

        return configCache.getCouchID()
Example #3
0
    def createConfig(self, bad=False):
        """
        _createConfig_

        Create a config of some sort that we can load out of ConfigCache
        """

        PSetTweak = {
            "process": {
                "outputModules_": ["ThisIsAName"],
                "ThisIsAName": {"dataset": {"dataTier": "RECO", "filterName": "Filter"}},
            }
        }

        BadTweak = {
            "process": {
                "outputModules_": ["ThisIsAName1", "ThisIsAName2"],
                "ThisIsAName1": {"dataset": {"dataTier": "RECO", "filterName": "Filter"}},
                "ThisIsAName2": {"dataset": {"dataTier": "RECO", "filterName": "Filter"}},
            }
        }

        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=self.couchDBName)
        configCache.createUserGroup(groupname="testGroup", username="******")
        if bad:
            configCache.setPSetTweaks(PSetTweak=BadTweak)
        else:
            configCache.setPSetTweaks(PSetTweak=PSetTweak)
        configCache.save()

        return configCache.getCouchID()
Example #4
0
    def __call__(self, wmTask):
        """
        Trip through steps, find CMSSW steps, pull in config files,
        PSet Tweaks etc

        """
        for t in wmTask.steps().nodeIterator():
            t = WMStep.WMStepHelper(t)
            stepPath = "%s/%s" % (self.workingDirectory(), t.name())

            # the CMSSW has a special case with its ConfigCache argument
            if not t.stepType() in ("CMSSW", "MulticoreCMSSW"): continue
            if (hasattr(t.data.application.configuration,'retrieveConfigUrl')):
                # main config file
                fileTarget = "%s/%s" % (
                    stepPath,
                    t.data.application.command.configuration)
                urllib.urlretrieve(
                    t.data.application.configuration.retrieveConfigUrl,
                    fileTarget)
                # PSet Tweak
                cacheUrl = t.data.application.configuration.configCacheUrl
                cacheDb  = t.data.application.configuration.cacheName
                configId = t.data.application.configuration.configId
                tweakTarget = t.data.application.command.psetTweak

                configCache = ConfigCache(cacheUrl, cacheDb)
                configCache.loadByID(configId)
                tweak = TweakAPI.makeTweakFromJSON(configCache.getPSetTweaks())
                if tweak:
                    tweakFile = "%s/%s" % (stepPath, tweakTarget)
                    tweak.persist(tweakFile, "json")
Example #5
0
    def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None,
                               configDoc = None, couchURL = None,
                               couchDBName = None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        outputModules = {}
        if configDoc != None and configDoc != "":
            configCache = ConfigCache(couchURL, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if scenarioFunc in [ "promptReco", "expressProcessing" ]:
                for output in scenarioArgs.get('outputs',[]):
                    dataTier = output['dataTier']
                    moduleLabel = output['moduleLabel']
                    filterName = output.get('filterName', None)
                    outputModules[moduleLabel] = {'dataTier' : dataTier,
                                                  'filterName' : filterName}
            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get('skims',[]):
                    dataTier = "ALCARECO"
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    filterName = alcaSkim
                    outputModules[moduleLabel] = {'dataTier' : dataTier,
                                                  'filterName' : filterName}

        return outputModules
Example #6
0
 def showOriginalConfig(self, docId):
     """ Makes a link to the original text of the config """
     configCache = ConfigCache(self.couchUrl, self.configDBName)
     configCache.loadByID(docId)
     configString =  configCache.getConfig()
     if configString == None:
         return "Cannot find document " + str(docId) + " in Couch DB"
     return '<pre>' + configString + '</pre>'
Example #7
0
 def showOriginalConfig(self, docId):
     """ Makes a link to the original text of the config """
     configCache = ConfigCache(self.couchUrl, self.configDBName)
     configCache.loadByID(docId)
     configString =  configCache.getConfig()
     if configString == None:
         return "Cannot find document " + str(docId) + " in Couch DB"
     return '<pre>' + configString + '</pre>'
Example #8
0
    def determineOutputModules(self,
                               scenarioFunc=None,
                               scenarioArgs=None,
                               configDoc=None,
                               couchURL=None,
                               couchDBName=None,
                               configCacheUrl=None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        # set default scenarioArgs to empty dictionary if it is None.
        scenarioArgs = scenarioArgs or {}

        outputModules = {}
        if configDoc != None and configDoc != "":
            url = configCacheUrl or couchURL
            configCache = ConfigCache(url, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if 'outputs' in scenarioArgs and scenarioFunc in [
                    "promptReco", "expressProcessing", "repack"
            ]:
                for output in scenarioArgs.get('outputs', []):
                    moduleLabel = output['moduleLabel']
                    outputModules[moduleLabel] = {
                        'dataTier': output['dataTier']
                    }
                    if output.has_key('primaryDataset'):
                        outputModules[moduleLabel]['primaryDataset'] = output[
                            'primaryDataset']
                    if output.has_key('filterName'):
                        outputModules[moduleLabel]['filterName'] = output[
                            'filterName']

            elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco":
                for dataTier in scenarioArgs.get('writeTiers'):
                    moduleLabel = "%soutput" % dataTier
                    outputModules[moduleLabel] = {'dataTier': dataTier}

            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get('skims', []):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    if alcaSkim.startswith("PromptCalibProd"):
                        dataTier = "ALCAPROMPT"
                    else:
                        dataTier = "ALCARECO"
                    outputModules[moduleLabel] = {
                        'dataTier': dataTier,
                        'primaryDataset': scenarioArgs.get('primaryDataset'),
                        'filterName': alcaSkim
                    }

        return outputModules
Example #9
0
def createConfig(couchDBName):
    """
    _createConfig_

    Create a config of some sort that we can load out of ConfigCache
    """

    PSetTweak = {
        'process': {
            'outputModules_': ['RECOoutput', 'ALCARECOoutput'],
            'RECOoutput': {
                'dataset': {
                    'dataTier': 'RECO',
                    'filterName': 'Filter'
                }
            },
            'ALCARECOoutput': {
                'dataset': {
                    'dataTier': 'ALCARECO',
                    'filterName': 'AlcaFilter'
                }
            }
        }
    }

    configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=couchDBName)
    configCache.createUserGroup(groupname="testGroup", username='******')
    configCache.setPSetTweaks(PSetTweak=PSetTweak)
    configCache.save()

    return configCache.getCouchID()
Example #10
0
    def validateConfigCacheExists(self,
                                  configID,
                                  couchURL,
                                  couchDBName,
                                  getOutputModules=False):
        """
        _validateConfigCacheExists_

        If we have a configCache, we should probably try and load it.
        """

        if configID == '' or configID == ' ':
            self.raiseValidationException(
                msg="ConfigCacheID is invalid and cannot be loaded")

        configCache = ConfigCache(dbURL=couchURL,
                                  couchDBName=couchDBName,
                                  id=configID)
        try:
            configCache.loadByID(configID=configID)
        except ConfigCacheException:
            self.raiseValidationException(
                msg="Failure to load ConfigCache while validating workload")

        duplicateCheck = {}
        try:
            outputModuleInfo = configCache.getOutputModuleInfo()
        except Exception:
            # Something's gone wrong with trying to open the configCache
            msg = "Error in getting output modules from ConfigCache during workload validation.  Check ConfigCache formatting!"
            self.raiseValidationException(msg=msg)
        for outputModule in outputModuleInfo.values():
            dataTier = outputModule.get('dataTier', None)
            filterName = outputModule.get('filterName', None)
            if not dataTier:
                self.raiseValidationException(
                    msg="No DataTier in output module.")

            # Add dataTier to duplicate dictionary
            if not dataTier in duplicateCheck.keys():
                duplicateCheck[dataTier] = []
            if filterName in duplicateCheck[dataTier]:
                # Then we've seen this combination before
                self.raiseValidationException(
                    msg="Duplicate dataTier/filterName combination.")
            else:
                duplicateCheck[dataTier].append(filterName)

        if getOutputModules:
            return outputModuleInfo

        return
    def __init__(self, **options):
        GeneratorInterface.__init__(self, **options)
        self.couchUrl = options.get("CouchUrl")
        self.couchDBName = options.get("CouchDBName")
        self.couchConfigDoc = options.get("ConfigCacheDoc")

        confCache = ConfigCache(dbURL = self.couchUrl, couchDBName = self.couchDBName, id = self.couchConfigDoc)
        confCache.load()
        seeds = confCache.document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService']
        self.seedTable = []
        for k in seeds.keys():
            if k == u"parameters_" : continue
            self.seedTable.append("process.RandomNumberGeneratorService.%s.initialSeed" % k)
Example #12
0
    def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None,
                               configDoc = None, couchURL = None,
                               couchDBName = None, configCacheUrl = None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        # set default scenarioArgs to empty dictionary if it is None.
        scenarioArgs = scenarioArgs or {}

        outputModules = {}
        if configDoc != None and configDoc != "":
            url = configCacheUrl or couchURL
            if  (url, couchDBName) in self.config_cache:
                configCache = self.config_cache[(url, couchDBName)]
            else:
                configCache = ConfigCache(url, couchDBName, True)
                self.config_cache[(url, couchDBName)] = configCache
            #TODO: need to change to DataCache
            #configCache.loadDocument(configDoc)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]:
                for output in scenarioArgs.get('outputs', []):
                    moduleLabel = output['moduleLabel']
                    outputModules[moduleLabel] = { 'dataTier' : output['dataTier'] }
                    if 'primaryDataset' in output:
                        outputModules[moduleLabel]['primaryDataset'] = output['primaryDataset']
                    if 'filterName' in output:
                        outputModules[moduleLabel]['filterName'] = output['filterName']

            elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco":
                for dataTier in scenarioArgs.get('writeTiers'):
                    moduleLabel = "%soutput" % dataTier
                    outputModules[moduleLabel] = { 'dataTier' : dataTier }

            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get('skims',[]):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    if alcaSkim.startswith("PromptCalibProd"):
                        dataTier = "ALCAPROMPT"
                    else:
                        dataTier = "ALCARECO"
                    outputModules[moduleLabel] = { 'dataTier' : dataTier,
                                                   'primaryDataset' : scenarioArgs.get('primaryDataset'),
                                                   'filterName' : alcaSkim }

        return outputModules
Example #13
0
    def setUp(self):
        """
        _setUp_

        setUp function for unittest
        """
        # Set constants
        self.couchDB = "config_test"
        self.configURL = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", 'WMCore.Agent.Database'],
            useDefault=False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")

        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(
            package="WMComponent.DBSBuffer.Database",
            logger=myThread.logger,
            dbinterface=myThread.dbi)
        self.buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                         logger=myThread.logger,
                                         dbinterface=myThread.dbi)

        locationAction = self.bufferFactory(
            classname="DBSBufferFiles.AddLocation")
        locationAction.execute(siteName="se1.cern.ch")
        locationAction.execute(siteName="se1.fnal.gov")
        locationAction.execute(siteName="malpaquet")

        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName=self.couchDB)
        configCache.createUserGroup(groupname="testGroup", username='******')
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, 'w')
        f.write(self.configString)
        f.close()

        configCache.addConfig(newConfig=psetPath, psetHash=None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB,
                                         configCache.getCouchID())
        return
Example #14
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a PromptSkimming workload with the given parameters.
        """
        configCouchUrl = arguments.get("ConfigCacheUrl",
                                       None) or arguments["CouchURL"]
        injectIntoConfigCache(arguments["CMSSWVersion"],
                              arguments["ScramArch"], arguments["InitCommand"],
                              arguments["SkimConfig"], workloadName,
                              configCouchUrl, arguments["CouchDBName"],
                              arguments.get("EnvPath", None),
                              arguments.get("BinPath", None))

        try:
            configCache = ConfigCache(configCouchUrl, arguments["CouchDBName"])
            arguments["ConfigCacheID"] = configCache.getIDFromLabel(
                workloadName)
            if not arguments["ConfigCacheID"]:
                logging.error("The configuration was not uploaded to couch")
                raise Exception
        except Exception:
            logging.error(
                "There was an exception loading the config out of the")
            logging.error(
                "ConfigCache.  Check the scramOutput.log file in the")
            logging.error(
                "PromptSkimScheduler directory to find out what went")
            logging.error("wrong.")
            raise

        parsedProcVer = parseT0ProcVer(arguments["ProcessingVersion"],
                                       'PromptSkim')
        arguments["ProcessingString"] = parsedProcVer["ProcString"]
        arguments["ProcessingVersion"] = parsedProcVer["ProcVer"]

        workload = DataProcessingWorkloadFactory.__call__(
            self, workloadName, arguments)

        # We need to strip off "MSS" as that causes all sorts of problems.
        if arguments["CustodialSite"].find("MSS") != -1:
            site = arguments["CustodialSite"][:-4]
        else:
            site = arguments["CustodialSite"]

        workload.setSiteWhitelist(site)
        workload.setBlockWhitelist(arguments["BlockName"])
        return workload
Example #15
0
    def validateConfigCacheExists(self, configID, couchURL, couchDBName,
                                  getOutputModules = False):
        """
        _validateConfigCacheExists_

        If we have a configCache, we should probably try and load it.
        """
        from WMCore.Cache.WMConfigCache import ConfigCache
        configCache = ConfigCache(dbURL = couchURL,
                                  couchDBName = couchDBName,
                                  id = configID)
        try:
            configCache.loadByID(configID = configID)
        except ConfigCacheException, ex:
            self.raiseValidationException(msg = "Failure to load ConfigCache while validating workload")
Example #16
0
def createAlgoFromInfo(info):
    """
    Create an Algo object from basic information

    """

    algo = {
        'ApplicationName': info['ApplicationName'],
        'ApplicationFamily': info['ApplicationFamily'],
        'ApplicationVersion': info['ApplicationVersion'],
        'PSetHash': info['PSetHash'],
        'PSetContent': None,
        'InDBS': info['AlgoInDBS']
    }

    configString = info.get('PSetContent')
    if configString:
        try:
            split = configString.split(';;')
            cacheURL = split[0]
            cacheDB = split[1]
            configID = split[2]
        except IndexError:
            msg = "configCache not properly formatted\n"
            msg += "configString\n: %s" % configString
            msg += "Not attempting to put configCache content in DBS for this algo"
            msg += "AlgoInfo: %s" % algo
            logging.error(msg)
            return algo
        if cacheURL == "None" or cacheDB == "None" or configID == "None":
            # No Config for this DB
            logging.debug("No configCache for this algo")
            return algo
        try:
            configCache = ConfigCache(cacheURL, cacheDB)
            configCache.loadByID(configID)
            algo['PSetContent'] = configCache.getConfig()
        except Exception as ex:
            msg = "Exception in getting configCache from DB\n"
            msg += "Ignoring this exception and continuing without config.\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            logging.debug("URL: %s,  DB: %s,  ID: %s" %
                          (cacheURL, cacheDB, configID))

    return algo
Example #17
0
def createAlgoFromInfo(info):
    """
    Create an Algo object from basic information

    """

    algo = {
        "ApplicationName": info["ApplicationName"],
        "ApplicationFamily": info["ApplicationFamily"],
        "ApplicationVersion": info["ApplicationVersion"],
        "PSetHash": info["PSetHash"],
        "PSetContent": None,
        "InDBS": info["AlgoInDBS"],
    }

    configString = info.get("PSetContent")
    if configString:
        try:
            split = configString.split(";;")
            cacheURL = split[0]
            cacheDB = split[1]
            configID = split[2]
        except IndexError:
            msg = "configCache not properly formatted\n"
            msg += "configString\n: %s" % configString
            msg += "Not attempting to put configCache content in DBS for this algo"
            msg += "AlgoInfo: %s" % algo
            logging.error(msg)
            return algo
        if cacheURL == "None" or cacheDB == "None" or configID == "None":
            # No Config for this DB
            logging.debug("No configCache for this algo")
            return algo
        try:
            configCache = ConfigCache(cacheURL, cacheDB)
            configCache.loadByID(configID)
            algo["PSetContent"] = configCache.getConfig()
        except Exception as ex:
            msg = "Exception in getting configCache from DB\n"
            msg += "Ignoring this exception and continuing without config.\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            logging.debug("URL: %s,  DB: %s,  ID: %s" % (cacheURL, cacheDB, configID))

    return algo
Example #18
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a PromptSkimming workload with the given parameters.
        """
        self.injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"],
                                   arguments["InitCommand"], arguments["SkimConfig"], workloadName,
                                   arguments["CouchURL"], arguments["CouchDBName"])

        configCache = ConfigCache(arguments["CouchURL"], arguments["CouchDBName"])
        arguments["ProcConfigCacheID"] = configCache.getIDFromLabel(workloadName)
        
        workload = DataProcessingWorkloadFactory.__call__(self, workloadName, arguments)
        workload.setSiteWhitelist(arguments["CustodialSite"])
        workload.setBlockWhitelist(arguments["BlockName"])
        return workload
    def __init__(self, **options):
        GeneratorInterface.__init__(self, **options)
        self.couchUrl = options.get("CouchUrl")
        self.couchDBName = options.get("CouchDBName")
        self.couchConfigDoc = options.get("ConfigCacheDoc")

        confCache = ConfigCache(dbURL=self.couchUrl,
                                couchDBName=self.couchDBName,
                                id=self.couchConfigDoc)
        confCache.load()
        seeds = confCache.document[u'pset_tweak_details'][u'process'][
            u'RandomNumberGeneratorService']
        self.seedTable = []
        for k in seeds:
            if k == u"parameters_": continue
            self.seedTable.append(
                "process.RandomNumberGeneratorService.%s.initialSeed" % k)
Example #20
0
 def _getConfigCache(self, requestName, processMethod):
     try:
         request = Utilities.requestDetails(requestName)
     except Exception as ex:
         msg = "Cannot find request %s, check logs." % requestName
         logging.error("%s, reason: %s" % (msg, ex))
         return msg
     url = request.get("ConfigCacheUrl", None) or self.couchUrl
     try:
         configCache = ConfigCache(url, self.configDBName)
         configDocId = request["ConfigCacheID"]
         configCache.loadByID(configDocId)
     except Exception as ex:
         msg = "Cannot find ConfigCache document %s on %s." % (configDocId, url)
         logging.error("%s, reason: %s" % (msg, ex))
         return msg
     return getattr(configCache, processMethod)()
Example #21
0
    def validateConfigCacheExists(self, configID, couchURL, couchDBName,
                                  getOutputModules = False):
        """
        _validateConfigCacheExists_

        If we have a configCache, we should probably try and load it.
        """

        if configID == '' or configID == ' ':
            self.raiseValidationException(msg = "ConfigCacheID is invalid and cannot be loaded")
            
        configCache = ConfigCache(dbURL = couchURL, couchDBName = couchDBName,
                                  id = configID)
        try:
            configCache.loadByID(configID = configID)
        except ConfigCacheException, ex:
            self.raiseValidationException(msg = "Failure to load ConfigCache while validating workload")
Example #22
0
    def __call__(self, wmTask):
        """
        Trip through steps, find CMSSW steps, pull in config files,
        PSet Tweaks etc

        """
        for t in wmTask.steps().nodeIterator():
            t = WMStep.WMStepHelper(t)
            stepPath = "%s/%s" % (self.workingDirectory(), t.name())

            # the CMSSW has a special case with its ConfigCache argument
            if not t.stepType() in ("CMSSW", "MulticoreCMSSW"): continue
            if getattr(t.data.application.configuration, 'configCacheUrl', None) != None:
                # main config file
                fileTarget = "%s/%s" % (
                    stepPath,
                    t.data.application.command.configuration)
                #urllib.urlretrieve(
                #    t.data.application.configuration.retrieveConfigUrl,
                #    fileTarget)
                # PSet Tweak
                cacheUrl = t.data.application.configuration.configCacheUrl
                cacheDb  = t.data.application.configuration.cacheName
                configId = t.data.application.configuration.configId
                tweakTarget = t.data.application.command.psetTweak

                configCache = ConfigCache(cacheUrl, cacheDb)
                configCache.loadByID(configId)
                configCache.saveConfigToDisk(targetFile = fileTarget)
                tweak = TweakAPI.makeTweakFromJSON(configCache.getPSetTweaks())
                if tweak:
                    tweakFile = "%s/%s" % (stepPath, tweakTarget)
                    tweak.persist(tweakFile, "json")
Example #23
0
    def getConfig(self):
        """
        _getConfig_

        Create a test config and put it in the cache
        """
        PSetTweak = {'someKey': "Hello, I am a PSetTweak.  It's nice to meet you."}

        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test')
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        configCache.setPSetTweaks(PSetTweak = PSetTweak)
        configCache.attachments['configFile'] = 'This Is A Test Config'
        configCache.save()
        return configCache
Example #24
0
    def setUp(self):
        """
        _setUp_
        
        setUp function for unittest

        """
        # Set constants
        self.couchDB      = "config_test"
        self.configURL    = "RANDOM;;URL;;NAME"
        self.configString = "This is a random string"
        
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = 
                                ["WMComponent.DBS3Buffer",
                                 'WMCore.Agent.Database'],
                                useDefault = False)
        self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache")
      
        myThread = threading.currentThread()
        self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                        logger = myThread.logger,
                                        dbinterface = myThread.dbi)

        locationAction = self.bufferFactory(classname = "DBSBufferFiles.AddLocation")
        locationAction.execute(siteName = "se1.cern.ch")
        locationAction.execute(siteName = "se1.fnal.gov")
        locationAction.execute(siteName = "malpaquet") 


        # Set heartbeat
        self.componentName = 'JobSubmitter'
        self.heartbeatAPI  = HeartbeatAPI(self.componentName)
        self.heartbeatAPI.registerComponent()

        # Set up a config cache
        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDB)
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        self.testDir = self.testInit.generateWorkDir()

        psetPath = os.path.join(self.testDir, "PSet.txt")
        f = open(psetPath, 'w')
        f.write(self.configString)
        f.close()
        
        configCache.addConfig(newConfig = psetPath, psetHash = None)
        configCache.save()
        self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"],
                                         self.couchDB,
                                         configCache.getCouchID())

        return
Example #25
0
    def validateConfigCacheExists(self, configID, couchURL, couchDBName,
                                  getOutputModules = False):
        """
        _validateConfigCacheExists_

        If we have a configCache, we should probably try and load it.
        """

        if configID == '' or configID == ' ':
            self.raiseValidationException(msg = "ConfigCacheID is invalid and cannot be loaded")

        configCache = ConfigCache(dbURL = couchURL, couchDBName = couchDBName,
                                  id = configID)
        try:
            configCache.loadByID(configID = configID)
        except ConfigCacheException:
            self.raiseValidationException(msg = "Failure to load ConfigCache while validating workload")

        duplicateCheck = {}
        try:
            outputModuleInfo = configCache.getOutputModuleInfo()
        except Exception:
            # Something's gone wrong with trying to open the configCache
            msg = "Error in getting output modules from ConfigCache during workload validation.  Check ConfigCache formatting!"
            self.raiseValidationException(msg = msg)
        for outputModule in outputModuleInfo.values():
            dataTier   = outputModule.get('dataTier', None)
            filterName = outputModule.get('filterName', None)
            if not dataTier:
                self.raiseValidationException(msg = "No DataTier in output module.")

            # Add dataTier to duplicate dictionary
            if not dataTier in duplicateCheck.keys():
                duplicateCheck[dataTier] = []
            if filterName in duplicateCheck[dataTier]:
                # Then we've seen this combination before
                self.raiseValidationException(msg = "Duplicate dataTier/filterName combination.")
            else:
                duplicateCheck[dataTier].append(filterName)

        if getOutputModules:
            return outputModuleInfo

        return
Example #26
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a PromptSkimming workload with the given parameters.
        """
        self.injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"],
                                   arguments["InitCommand"], arguments["SkimConfig"], workloadName,
                                   arguments["CouchURL"], arguments["CouchDBName"])

        try:
            configCache = ConfigCache(arguments["CouchURL"], arguments["CouchDBName"])
            arguments["ProcConfigCacheID"] = configCache.getIDFromLabel(workloadName)
        except Exception, ex:
            logging.error("There was an exception loading the config out of the")
            logging.error("ConfigCache.  Check the scramOutput.log file in the")
            logging.error("PromptSkimScheduler directory to find out what went")
            logging.error("wrong.")
            raise
Example #27
0
    def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None,
                               configDoc = None, couchURL = None,
                               couchDBName = None, configCacheUrl = None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        outputModules = {}
        if configDoc != None and configDoc != "":
            url = configCacheUrl or couchURL
            configCache = ConfigCache(url, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]:
                for output in scenarioArgs.get('outputs', []):
                    moduleLabel = output['moduleLabel']
                    outputModules[moduleLabel] = { 'dataTier' : output['dataTier'] }
                    if output.has_key('primaryDataset'):
                        outputModules[moduleLabel]['primaryDataset'] = output['primaryDataset']
                    if output.has_key('filterName'):
                        outputModules[moduleLabel]['filterName'] = output['filterName']

            elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco":
                for dataTier in scenarioArgs.get('writeTiers'):
                    moduleLabel = "%soutput" % dataTier
                    outputModules[moduleLabel] = { 'dataTier' : dataTier }

            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get('skims',[]):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    if alcaSkim == "PromptCalibProd":
                        dataTier = "ALCAPROMPT"
                    else:
                        dataTier = "ALCARECO"
                    outputModules[moduleLabel] = { 'dataTier' : dataTier,
                                                   'primaryDataset' : scenarioArgs.get('primaryDataset'),
                                                   'filterName' : alcaSkim }

        return outputModules
Example #28
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a PromptSkimming workload with the given parameters.
        """
        configCouchUrl = arguments.get("ConfigCacheUrl", None) or arguments["CouchURL"]
        injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"],
                              arguments["InitCommand"], arguments["SkimConfig"], workloadName,
                              configCouchUrl, arguments["CouchDBName"],
                              arguments.get("EnvPath", None), arguments.get("BinPath", None))

        try:
            configCache = ConfigCache(configCouchUrl, arguments["CouchDBName"])
            arguments["ConfigCacheID"] = configCache.getIDFromLabel(workloadName)
            if not arguments["ConfigCacheID"]:
                logging.error("The configuration was not uploaded to couch")
                raise Exception
        except Exception:
            logging.error("There was an exception loading the config out of the")
            logging.error("ConfigCache.  Check the scramOutput.log file in the")
            logging.error("PromptSkimScheduler directory to find out what went")
            logging.error("wrong.")
            raise

        parsedProcVer = parseT0ProcVer(arguments["ProcessingVersion"],
                                       'PromptSkim')
        arguments["ProcessingString"] = parsedProcVer["ProcString"]
        arguments["ProcessingVersion"] = parsedProcVer["ProcVer"]

        workload = DataProcessingWorkloadFactory.__call__(self, workloadName, arguments)

        # We need to strip off "MSS" as that causes all sorts of problems.
        if arguments["CustodialSite"].find("MSS") != -1:
            site = arguments["CustodialSite"][:-4]
        else:
            site = arguments["CustodialSite"]

        workload.setSiteWhitelist(site)
        workload.setBlockWhitelist(arguments["BlockName"])
        return workload
Example #29
0
    def determineOutputModules(self, scenarioName = None, scenarioArgs = None,
                               configDoc = None, couchURL = None,
                               couchDBName = None):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        outputModules = {}
        if configDoc != None and configDoc != "":
            configCache = ConfigCache(couchURL, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            for dataTier in scenarioArgs.get("writeTiers",[]):
                outputModuleName = "output%s%s" % (dataTier, dataTier)
                outputModules[outputModuleName] = {"dataTier": dataTier,
                                                   "filterName": None}

        return outputModules
Example #30
0
    def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules=True):
        """
        _validateConfigCacheExists_

        If we have a configCache, we should probably try and load it.
        """

        if configID == "" or configID == " ":
            self.raiseValidationException(msg="ConfigCacheID is invalid and cannot be loaded")

        if (couchURL, couchDBName) in self.config_cache:
            configCache = self.config_cache[(couchURL, couchDBName)]
        else:
            configCache = ConfigCache(dbURL=couchURL, couchDBName=couchDBName, detail=getOutputModules)
            self.config_cache[(couchURL, couchDBName)] = configCache

        try:
            # if dtail option is set return outputModules
            return configCache.validate(configID)
        except ConfigCacheException as ex:
            self.raiseValidationException(ex.message())
Example #31
0
    def determineOutputModules(
        self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None
    ):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        outputModules = {}
        if configDoc != None and configDoc != "":
            configCache = ConfigCache(couchURL, couchDBName)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if "outputs" in scenarioArgs and scenarioFunc in ["promptReco", "expressProcessing", "repack"]:
                for output in scenarioArgs.get("outputs", []):
                    moduleLabel = output["moduleLabel"]
                    outputModules[moduleLabel] = {"dataTier": output["dataTier"]}
                    if output.has_key("primaryDataset"):
                        outputModules[moduleLabel]["primaryDataset"] = output["primaryDataset"]
                    if output.has_key("filterName"):
                        outputModules[moduleLabel]["filterName"] = output["filterName"]

            elif "writeTiers" in scenarioArgs and scenarioFunc == "promptReco":
                for dataTier in scenarioArgs.get("writeTiers"):
                    moduleLabel = "%soutput" % dataTier
                    outputModules[moduleLabel] = {"dataTier": dataTier}

            elif scenarioFunc == "alcaSkim":
                for alcaSkim in scenarioArgs.get("skims", []):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    outputModules[moduleLabel] = {
                        "dataTier": "ALCARECO",
                        "primaryDataset": scenarioArgs.get("primaryDataset"),
                        "filterName": alcaSkim,
                    }

        return outputModules
Example #32
0
    def validateConfigCacheExists(self,
                                  configID,
                                  couchURL,
                                  couchDBName,
                                  getOutputModules=False):
        """
        _validateConfigCacheExists_

        If we have a configCache, we should probably try and load it.
        """

        if configID == '' or configID == ' ':
            self.raiseValidationException(
                msg="ConfigCacheID is invalid and cannot be loaded")

        configCache = ConfigCache(dbURL=couchURL,
                                  couchDBName=couchDBName,
                                  id=configID)
        try:
            configCache.loadByID(configID=configID)
        except ConfigCacheException, ex:
            self.raiseValidationException(
                msg="Failure to load ConfigCache while validating workload")
Example #33
0
    def testE_SaveConfigFileToDisk(self):
        """
        _SaveConfigFileToDisk_

        Check and see if we can save the config file attachment to disk
        """
        targetFile = os.path.join(self.testDir, 'configCache.test')

        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test')
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        configCache.attachments['configFile'] = 'ThisIsAConfigFile'
        configCache.saveConfigToDisk(targetFile = targetFile)

        f = open(targetFile, 'r')
        content = f.read()
        f.close()

        self.assertEqual(content, configCache.getConfig())
        return
def createConfig(couchDBName):
    """
    _createConfig_

    Create a config of some sort that we can load out of ConfigCache
    """
    
    PSetTweak = {'process': {'outputModules_': ['RECOoutput', 'ALCARECOoutput'],
                             'RECOoutput': {'dataset': {'dataTier': 'RECO',
                                                         'filterName': 'Filter'}},
                             'ALCARECOoutput': {'dataset': {'dataTier': 'ALCARECO',
                                                            'filterName': 'AlcaFilter'}}}}

    configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = couchDBName)
    configCache.createUserGroup(groupname = "testGroup", username = '******')
    configCache.setPSetTweaks(PSetTweak = PSetTweak)
    configCache.save()

    return configCache.getCouchID()
Example #35
0
    def testE_SaveConfigFileToDisk(self):
        """
        _SaveConfigFileToDisk_

        Check and see if we can save the config file attachment to disk
        """
        targetFile = os.path.join(self.testDir, 'configCache.test')

        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test')
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        configCache.attachments['configFile'] = 'ThisIsAConfigFile'
        configCache.saveConfigToDisk(targetFile = targetFile)

        f = open(targetFile, 'r')
        content = f.read()
        f.close()

        self.assertEqual(content, configCache.getConfig())
        return
Example #36
0
    def buildWorkload(self):
        """
        _buildWorkload_

        Build the workload given all of the input parameters.

        Not that there will be LogCollect tasks created for each processing
        task and Cleanup tasks created for each merge task.

        """
        (self.inputPrimaryDataset, self.inputProcessedDataset,
         self.inputDataTier) = self.inputDataset[1:].split("/")

        workload = self.createWorkload()
        workload.setDashboardActivity("tier0")
        self.reportWorkflowToDashboard(workload.getDashboardActivity())
        workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo,
                                         self.procJobSplitArgs)

        cmsswStepType = "CMSSW"
        taskType = "Processing"
        if self.multicore:
            taskType = "MultiProcessing"

        recoOutputs = []
        for dataTier in self.writeTiers:
            recoOutputs.append({
                'dataTier': dataTier,
                'eventContent': dataTier,
                'moduleLabel': "write_%s" % dataTier
            })

        recoTask = workload.newTask("Reco")
        recoOutMods = self.setupProcessingTask(recoTask,
                                               taskType,
                                               self.inputDataset,
                                               scenarioName=self.procScenario,
                                               scenarioFunc="promptReco",
                                               scenarioArgs={
                                                   'globalTag': self.globalTag,
                                                   'skims': self.alcaSkims,
                                                   'dqmSeq': self.dqmSequences,
                                                   'outputs': recoOutputs
                                               },
                                               splitAlgo=self.procJobSplitAlgo,
                                               splitArgs=self.procJobSplitArgs,
                                               stepType=cmsswStepType,
                                               forceUnmerged=True)
        if self.doLogCollect:
            self.addLogCollectTask(recoTask)

        recoMergeTasks = {}
        for recoOutLabel, recoOutInfo in recoOutMods.items():
            if recoOutInfo['dataTier'] != "ALCARECO":
                mergeTask = self.addMergeTask(recoTask,
                                              self.procJobSplitAlgo,
                                              recoOutLabel,
                                              doLogCollect=self.doLogCollect)
                recoMergeTasks[recoOutInfo['dataTier']] = mergeTask

            else:
                alcaTask = recoTask.addTask("AlcaSkim")
                alcaOutMods = self.setupProcessingTask(
                    alcaTask,
                    taskType,
                    inputStep=recoTask.getStep("cmsRun1"),
                    inputModule=recoOutLabel,
                    scenarioName=self.procScenario,
                    scenarioFunc="alcaSkim",
                    scenarioArgs={
                        'globalTag': self.globalTag,
                        'skims': self.alcaSkims,
                        'primaryDataset': self.inputPrimaryDataset
                    },
                    splitAlgo="WMBSMergeBySize",
                    splitArgs={
                        "max_merge_size": self.maxMergeSize,
                        "min_merge_size": self.minMergeSize,
                        "max_merge_events": self.maxMergeEvents
                    },
                    stepType=cmsswStepType)
                if self.doLogCollect:
                    self.addLogCollectTask(alcaTask,
                                           taskName="AlcaSkimLogCollect")
                self.addCleanupTask(recoTask, recoOutLabel)

                for alcaOutLabel, alcaOutInfo in alcaOutMods.items():
                    self.addMergeTask(alcaTask,
                                      self.procJobSplitAlgo,
                                      alcaOutLabel,
                                      doLogCollect=self.doLogCollect)

        for promptSkim in self.promptSkims:
            if not promptSkim.DataTier in recoMergeTasks:
                error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier
                error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys(
                )
                error += 'That should be in the relevant skimConfig in T0AST'
                logging.error(error)
                raise Exception

            mergeTask = recoMergeTasks[promptSkim.DataTier]
            skimTask = mergeTask.addTask(promptSkim.SkimName)
            parentCmsswStep = mergeTask.getStep('cmsRun1')

            parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion,
                                           'PromptSkim')
            self.processingString = parsedProcVer["ProcString"]
            self.processingVersion = parsedProcVer["ProcVer"]

            if promptSkim.TwoFileRead:
                self.skimJobSplitArgs['include_parents'] = True
            else:
                self.skimJobSplitArgs['include_parents'] = False

            configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName)
            configCacheUrl = self.configCacheUrl or self.couchURL
            injectIntoConfigCache(self.frameworkVersion, self.scramArch,
                                  self.initCommand, promptSkim.ConfigURL,
                                  configLabel, configCacheUrl,
                                  self.couchDBName, self.envPath, self.binPath)
            try:
                configCache = ConfigCache(configCacheUrl, self.couchDBName)
                configCacheID = configCache.getIDFromLabel(configLabel)
                if configCacheID:
                    logging.error(
                        "The configuration was not uploaded to couch")
                    raise Exception
            except Exception:
                logging.error(
                    "There was an exception loading the config out of the")
                logging.error(
                    "ConfigCache.  Check the scramOutput.log file in the")
                logging.error(
                    "PromptSkimScheduler directory to find out what went")
                logging.error("wrong.")
                raise

            outputMods = self.setupProcessingTask(
                skimTask,
                "Skim",
                inputStep=parentCmsswStep,
                inputModule="Merged",
                couchURL=self.couchURL,
                couchDBName=self.couchDBName,
                configCacheUrl=self.configCacheUrl,
                configDoc=configCacheID,
                splitAlgo=self.skimJobSplitAlgo,
                splitArgs=self.skimJobSplitArgs)
            if self.doLogCollect:
                self.addLogCollectTask(skimTask,
                                       taskName="%sLogCollect" %
                                       promptSkim.SkimName)

            for outputModuleName in outputMods.keys():
                self.addMergeTask(skimTask,
                                  self.skimJobSplitAlgo,
                                  outputModuleName,
                                  doLogCollect=self.doLogCollect)

        return workload
Example #37
0
    def createConfig(self, bad=False):
        """
        _createConfig_

        Create a config of some sort that we can load out of ConfigCache
        
        """
        PSetTweak = {
            'process': {
                'outputModules_': ['ThisIsAName'],
                'ThisIsAName': {
                    'dataset': {
                        'dataTier': 'RECO',
                        'filterName': 'Filter'
                    }
                }
            }
        }
        BadTweak = {
            'process': {
                'outputModules_': ['ThisIsAName1', 'ThisIsAName2'],
                'ThisIsAName1': {
                    'dataset': {
                        'dataTier': 'RECO',
                        'filterName': 'Filter'
                    }
                },
                'ThisIsAName2': {
                    'dataset': {
                        'dataTier': 'RECO',
                        'filterName': 'Filter'
                    }
                }
            }
        }
        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName=self.couchDBName)
        configCache.createUserGroup(groupname="testGroup", username='******')
        if bad:
            configCache.setPSetTweaks(PSetTweak=BadTweak)
        else:
            configCache.setPSetTweaks(PSetTweak=PSetTweak)
        configCache.save()
        return configCache.getCouchID()
Example #38
0
 def showTweakFile(self, docId):
     """ Makes a link to the dump of the tweakfile """
     configCache = ConfigCache(self.couchUrl, self.configDBName)
     configCache.loadByID(docId)
     return str(configCache.getPSetTweaks()).replace('\n', '<br>')
Example #39
0
    def determineOutputModules(
        self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None, configCacheUrl=None
    ):
        """
        _determineOutputModules_

        Determine the output module names and associated metadata for the
        given config.
        """
        # set default scenarioArgs to empty dictionary if it is None.
        scenarioArgs = scenarioArgs or {}

        outputModules = {}
        if configDoc != None and configDoc != "":
            url = configCacheUrl or couchURL
            if (url, couchDBName) in self.config_cache:
                configCache = self.config_cache[(url, couchDBName)]
            else:
                configCache = ConfigCache(url, couchDBName, True)
                self.config_cache[(url, couchDBName)] = configCache
            # TODO: need to change to DataCache
            # configCache.loadDocument(configDoc)
            configCache.loadByID(configDoc)
            outputModules = configCache.getOutputModuleInfo()
        else:
            if "outputs" in scenarioArgs and scenarioFunc in ["promptReco", "expressProcessing", "repack"]:

                for output in scenarioArgs.get("outputs", []):

                    moduleLabel = output["moduleLabel"]
                    outputModules[moduleLabel] = {"dataTier": output["dataTier"]}
                    if "primaryDataset" in output:
                        outputModules[moduleLabel]["primaryDataset"] = output["primaryDataset"]
                    if "filterName" in output:
                        outputModules[moduleLabel]["filterName"] = output["filterName"]

                for physicsSkim in scenarioArgs.get("PhysicsSkims", []):
                    skimToDataTier = {
                        "LogError": "RAW-RECO",
                        "LogErrorMonitor": "USER",
                        "ZElectron": "RAW-RECO",
                        "ZMu": "RAW-RECO",
                        "MuTau": "RAW-RECO",
                        "TopMuEG": "RAW-RECO",
                        "EcalActivity": "RAW-RECO",
                        "CosmicSP": "RAW-RECO",
                        "CosmicTP": "RAW-RECO",
                        "ZMM": "RAW-RECO",
                        "Onia": "RECO",
                        "HighPtJet": "RAW-RECO",
                        "D0Meson": "RECO",
                        "Photon": "AOD",
                        "ZEE": "AOD",
                        "BJet": "AOD",
                        "OniaCentral": "RECO",
                        "OniaPeripheral": "RECO",
                        "SingleTrack": "AOD",
                        "MinBias": "AOD",
                        "OniaUPC": "RAW-RECO",
                        "HighMET": "RECO",
                        "BPHSkim": "USER",
                    }
                    dataTier = skimToDataTier.get(physicsSkim, "USER")
                    moduleLabel = "SKIMStream%s" % physicsSkim
                    outputModules[moduleLabel] = {"dataTier": dataTier, "filterName": physicsSkim}

            elif scenarioFunc == "alcaSkim":

                for alcaSkim in scenarioArgs.get("skims", []):
                    moduleLabel = "ALCARECOStream%s" % alcaSkim
                    if alcaSkim.startswith("PromptCalibProd"):
                        dataTier = "ALCAPROMPT"
                    else:
                        dataTier = "ALCARECO"
                    outputModules[moduleLabel] = {
                        "dataTier": dataTier,
                        "primaryDataset": scenarioArgs.get("primaryDataset"),
                        "filterName": alcaSkim,
                    }

        return outputModules
Example #40
0
    def testListAllConfigs(self):
        """
        _testListAllConfigs_

        Verify that the list all configs method works correctly.
        """
        configCacheA = ConfigCache(os.environ["COUCHURL"],
                                   couchDBName='config_test')
        configCacheA.createUserGroup(groupname="testGroup", username='******')
        configCacheA.setLabel("labelA")
        configCacheA.save()

        configCacheB = ConfigCache(os.environ["COUCHURL"],
                                   couchDBName='config_test')
        configCacheB.createUserGroup(groupname="testGroup", username='******')
        configCacheB.setLabel("labelB")
        configCacheB.save()

        configs = configCacheA.listAllConfigsByLabel()

        self.assertEqual(len(configs.keys()), 2,
                         "Error: There should be two configs")
        self.assertEqual(configs["labelA"], configCacheA.getCouchID(),
                         "Error: Label A is wrong.")
        self.assertEqual(configs["labelB"], configCacheB.getCouchID(),
                         "Error: Label B is wrong.")
        return
Example #41
0
    def testD_LoadConfigCache(self):
        """
        _LoadConfigCache_

        Actually load the config cache using plain .load()
        Tests to make sure that if we pass in an id field it gets used to load configs
        """

        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName='config_test')
        configCache.createUserGroup(groupname="testGroup", username='******')
        configCache.setLabel("labelA")
        configCache.save()

        configCache2 = ConfigCache(os.environ["COUCHURL"],
                                   couchDBName='config_test',
                                   id=configCache.getCouchID(),
                                   rev=configCache.getCouchRev())
        configCache2.load()
        self.assertEqual(configCache2.document['owner'], {
            'group': 'testGroup',
            'user': '******'
        })
        self.assertEqual(configCache2.document['description'], {
            'config_desc': None,
            'config_label': 'labelA'
        })
        return
Example #42
0
 def _configCacheId(self, label):
     """Return config cache id for given config label"""
     key, cert = self.__class__.reqmgr['requests'].getKeyCert()
     configCache = ConfigCache(self.__class__.endpoint + '/couchdb',
                               'reqmgr_config_cache',
                               ckey=key,
                               cert=cert)
     try:
         configCacheId = configCache.getIDFromLabel(label)
     except:
         configCacheId = None
     if configCacheId:
         return configCacheId
     # The following will fail if FWCore.ParameterSet not in PYTHONPATH
     from PSetTweaks.WMTweak import makeTweak
     configCache.createUserGroup('test', 'test')
     configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data',
                              'configs')
     configCache.addConfig(os.path.join(configDir, label + '.py'))
     configCache.setLabel(label)
     configCache.setDescription(label)
     modPath = imp.find_module(label, [configDir])
     loadedConfig = imp.load_module(label, modPath[0], modPath[1],
                                    modPath[2])
     configCache.setPSetTweaks(
         makeTweak(loadedConfig.process).jsondictionary())
     configCache.save()
     return configCache.getIDFromLabel(label)
Example #43
0
                                 docId=request.get('ConfigCacheID', None),
                                 assignments=request['Assignments'],
                                 adminHtml=adminHtml,
                                 messages=request['RequestMessages'],
                                 updateDictList=request['RequestUpdates'])

    def _getConfigCache(self, requestName, processMethod):
        try:
            request = Utilities.requestDetails(requestName)
        except Exception, ex:
            msg = "Cannot find request %s, check logs." % requestName
            logging.error("%s, reason: %s" % (msg, ex))
            return msg
        url = request.get("ConfigCacheUrl", None) or self.couchUrl
        try:
            configCache = ConfigCache(url, self.configDBName)
            configDocId = request["ConfigCacheID"]
            configCache.loadByID(configDocId)
        except Exception, ex:
            msg = "Cannot find ConfigCache document %s on %s." % (configDocId,
                                                                  url)
            logging.error("%s, reason: %s" % (msg, ex))
            return msg
        return getattr(configCache, processMethod)()

    @cherrypy.expose
    @cherrypy.tools.secmodv2()
    def showOriginalConfig(self, requestName):
        """
        Makes a link to the original text of the config document.
        
Example #44
0
 def _configCacheId(self, label):
     """Return config cache id for given config label"""
     key, cert = self.__class__.reqmgr['requests'].getKeyCert()
     configCache = ConfigCache(self.__class__.endpoint + '/couchdb', 'reqmgr_config_cache', ckey = key, cert = cert)
     try:
         configCacheId = configCache.getIDFromLabel(label)
     except:
         configCacheId = None
     if configCacheId:
         return configCacheId
     # The following will fail if FWCore.ParameterSet not in PYTHONPATH
     from PSetTweaks.WMTweak import makeTweak
     configCache.createUserGroup('test', 'test')
     configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data', 'configs')
     configCache.addConfig(os.path.join(configDir, label + '.py'))
     configCache.setLabel(label)
     configCache.setDescription(label)
     modPath = imp.find_module(label, [configDir])
     loadedConfig = imp.load_module(label, modPath[0], modPath[1], modPath[2])
     configCache.setPSetTweaks(makeTweak(loadedConfig.process).jsondictionary())
     configCache.save()
     return configCache.getIDFromLabel(label)
Example #45
0
    def testA_basicConfig(self):
        """
        _basicConfig_

        Basic configCache stuff.
        """
        PSetTweak = "Hello, I am a PSetTweak.  It's nice to meet you."

        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName='config_test')
        configCache.createUserGroup(groupname="testGroup", username='******')
        configCache.setPSetTweaks(PSetTweak=PSetTweak)
        configCache.save()

        configCache2 = ConfigCache(os.environ["COUCHURL"],
                                   couchDBName='config_test',
                                   id=configCache.getCouchID(),
                                   rev=configCache.getCouchRev())
        configCache2.loadByID(configCache.getCouchID())

        self.assertEqual(configCache2.getPSetTweaks(), PSetTweak)

        configCache2.delete()
        configCache3 = ConfigCache(os.environ["COUCHURL"],
                                   couchDBName='config_test',
                                   id=configCache.getCouchID(),
                                   rev=configCache.getCouchRev())

        testFlag = False

        # It should fail to load deleted documents
        try:
            configCache3.loadByID(configCache.getCouchID())
        except ConfigCacheException:
            testFlag = True

        self.assertTrue(testFlag)

        return
Example #46
0
    def testB_addingConfigsAndTweaks(self):
        """
        _addingConfigsAndTweaks_

        Test adding config files and tweak files
        """
        PSetTweak = "Hello, I am a PSetTweak.  It's nice to meet you."
        attach = "Hello, I am an attachment"

        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName='config_test')
        configCache.createUserGroup(groupname="testGroup", username='******')
        configCache.setPSetTweaks(PSetTweak=PSetTweak)
        configCache.attachments['attach1'] = attach
        psetPath = os.path.join(getTestBase(), "WMCore_t/Cache_t/PSet.txt")
        configCache.addConfig(newConfig=psetPath, psetHash=None)

        configCache.setLabel("sample-label")
        configCache.setDescription("describe this config here")
        configCache.save()
        configString1 = configCache.getConfig()

        configCache2 = ConfigCache(os.environ["COUCHURL"],
                                   couchDBName='config_test',
                                   id=configCache.getCouchID(),
                                   rev=configCache.getCouchRev())
        configCache2.loadByID(configCache.getCouchID())
        configString2 = configCache2.getConfig()

        self.assertEqual(configString1, configString2)
        self.assertEqual(configCache2.attachments.get('attach1', None), attach)

        configCache.delete()
        return
Example #47
0
def upload_to_couch(cfg_name,
                    section_name,
                    user_name,
                    group_name,
                    test_mode=False,
                    url=None):
    if test_mode:
        return "00000000000000000"

    if not os.path.exists(cfg_name):
        raise RuntimeError("Error: Can't locate config file %s." % cfg_name)

    # create a file with the ID inside to avoid multiple injections
    oldID = cfg_name + '.couchID'

    if os.path.exists(oldID):
        f = open(oldID)
        the_id = f.readline().replace('\n', '')
        f.close()
        print cfg_name, 'already uploaded with ID', the_id, 'from', oldID
        return the_id

    try:
        loadedConfig = __loadConfig(cfg_name)
    except:
        #just try again !!
        time.sleep(2)
        loadedConfig = __loadConfig(cfg_name)

    where = COUCH_DB_ADDRESS
    if url:
        where = url

    configCache = ConfigCache(where, DATABASE_NAME)
    configCache.createUserGroup(group_name, user_name)
    configCache.addConfig(cfg_name)
    configCache.setPSetTweaks(makeTweak(loadedConfig.process).jsondictionary())
    configCache.setLabel(section_name)
    configCache.setDescription(section_name)
    configCache.save()

    print "Added file to the config cache:"
    print "  DocID:    %s" % configCache.document["_id"]
    print "  Revision: %s" % configCache.document["_rev"]

    f = open(oldID, "w")
    f.write(configCache.document["_id"])
    f.close()
    return configCache.document["_id"]
Example #48
0
                                assignments=request['Assignments'],
                                adminHtml=adminHtml,
                                messages=request['RequestMessages'],
                                updateDictList=request['RequestUpdates'])
        
        
    def _getConfigCache(self, requestName, processMethod):
        try:
            request = Utilities.requestDetails(requestName)
        except Exception, ex:
            msg = "Cannot find request %s, check logs." % requestName
            logging.error("%s, reason: %s" % (msg, ex))
            return msg
        url = request.get("ConfigCacheUrl", None) or self.couchUrl
        try:
            configCache = ConfigCache(url, self.configDBName)
            configDocId = request["ConfigCacheID"]
            configCache.loadByID(configDocId)
        except Exception, ex:
            msg = "Cannot find ConfigCache document %s on %s." % (configDocId, url)
            logging.error("%s, reason: %s" % (msg, ex))
            return msg
        return getattr(configCache, processMethod)()
            

    @cherrypy.expose
    @cherrypy.tools.secmodv2()
    def showOriginalConfig(self, requestName):
        """
        Makes a link to the original text of the config document.
        
Example #49
0
 def showTweakFile(self, docId):
     """ Makes a link to the dump of the tweakfile """
     configCache = ConfigCache(self.couchUrl, self.configDBName)
     configCache.loadByID(docId)
     return str(configCache.getPSetTweaks()).replace('\n', '<br>')
Example #50
0
    def buildWorkload(self):
        """
        _buildWorkload_

        Build the workload given all of the input parameters.

        Not that there will be LogCollect tasks created for each processing
        task and Cleanup tasks created for each merge task.

        """
        (self.inputPrimaryDataset, self.inputProcessedDataset,
         self.inputDataTier) = self.inputDataset[1:].split("/")

        workload = self.createWorkload()
        workload.setDashboardActivity("tier0")
        self.reportWorkflowToDashboard(workload.getDashboardActivity())
        workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo,
                                         self.procJobSplitArgs)

        cmsswStepType = "CMSSW"
        taskType = "Processing"

        recoOutputs = []
        for dataTier in self.writeTiers:
            recoOutputs.append( { 'dataTier' : dataTier,
                                  'eventContent' : dataTier,
                                  'moduleLabel' : "write_%s" % dataTier } )

        recoTask = workload.newTask("Reco")

        scenarioArgs = { 'globalTag' : self.globalTag,
                         'skims' : self.alcaSkims,
                         'dqmSeq' : self.dqmSequences,
                         'outputs' : recoOutputs }
        if self.globalTagConnect:
            scenarioArgs['globalTagConnect'] = self.globalTagConnect

        recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset,
                                               scenarioName = self.procScenario,
                                               scenarioFunc = "promptReco",
                                               scenarioArgs = scenarioArgs,
                                               splitAlgo = self.procJobSplitAlgo,
                                               splitArgs = self.procJobSplitArgs,
                                               stepType = cmsswStepType,
                                               forceUnmerged = True)
        if self.doLogCollect:
            self.addLogCollectTask(recoTask)

        recoMergeTasks = {}
        for recoOutLabel, recoOutInfo in recoOutMods.items():
            if recoOutInfo['dataTier'] != "ALCARECO":
                mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel,
                                              doLogCollect = self.doLogCollect)
                recoMergeTasks[recoOutInfo['dataTier']] = mergeTask

            else:
                alcaTask = recoTask.addTask("AlcaSkim")

                scenarioArgs = { 'globalTag' : self.globalTag,
                                 'skims' : self.alcaSkims,
                                 'primaryDataset' : self.inputPrimaryDataset }
                if self.globalTagConnect:
                    scenarioArgs['globalTagConnect'] = self.globalTagConnect

                alcaOutMods = self.setupProcessingTask(alcaTask, taskType,
                                                       inputStep = recoTask.getStep("cmsRun1"),
                                                       inputModule = recoOutLabel,
                                                       scenarioName = self.procScenario,
                                                       scenarioFunc = "alcaSkim",
                                                       scenarioArgs = scenarioArgs,
                                                       splitAlgo = "WMBSMergeBySize",
                                                       splitArgs = {"max_merge_size": self.maxMergeSize,
                                                                    "min_merge_size": self.minMergeSize,
                                                                    "max_merge_events": self.maxMergeEvents},
                                                       stepType = cmsswStepType,
                                                       useMulticore = False)
                if self.doLogCollect:
                    self.addLogCollectTask(alcaTask, taskName = "AlcaSkimLogCollect")
                self.addCleanupTask(recoTask, recoOutLabel)

                for alcaOutLabel, alcaOutInfo in alcaOutMods.items():
                    self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel,
                                      doLogCollect = self.doLogCollect)

        for promptSkim in self.promptSkims:
            if not promptSkim.DataTier in recoMergeTasks:
                error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier
                error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys()
                error += 'That should be in the relevant skimConfig in T0AST'
                logging.error(error)
                raise Exception

            mergeTask = recoMergeTasks[promptSkim.DataTier]
            skimTask = mergeTask.addTask(promptSkim.SkimName)
            parentCmsswStep = mergeTask.getStep('cmsRun1')

            parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion,
                                           'PromptSkim')
            self.processingString = parsedProcVer["ProcString"]
            self.processingVersion = parsedProcVer["ProcVer"]

            if promptSkim.TwoFileRead:
                self.skimJobSplitArgs['include_parents'] = True
            else:
                self.skimJobSplitArgs['include_parents'] = False

            configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName)
            configCacheUrl = self.configCacheUrl or self.couchURL
            injectIntoConfigCache(self.frameworkVersion, self.scramArch,
                                       self.initCommand, promptSkim.ConfigURL, configLabel,
                                       configCacheUrl, self.couchDBName,
                                       self.envPath, self.binPath)
            try:
                configCache = ConfigCache(configCacheUrl, self.couchDBName)
                configCacheID = configCache.getIDFromLabel(configLabel)
                if configCacheID:
                    logging.error("The configuration was not uploaded to couch")
                    raise Exception
            except Exception:
                logging.error("There was an exception loading the config out of the")
                logging.error("ConfigCache.  Check the scramOutput.log file in the")
                logging.error("PromptSkimScheduler directory to find out what went")
                logging.error("wrong.")
                raise

            outputMods = self.setupProcessingTask(skimTask, "Skim", inputStep = parentCmsswStep, inputModule = "Merged",
                                                  couchURL = self.couchURL, couchDBName = self.couchDBName,
                                                  configCacheUrl = self.configCacheUrl,
                                                  configDoc = configCacheID, splitAlgo = self.skimJobSplitAlgo,
                                                  splitArgs = self.skimJobSplitArgs, useMulticore = False)
            if self.doLogCollect:
                self.addLogCollectTask(skimTask, taskName = "%sLogCollect" % promptSkim.SkimName)

            for outputModuleName in outputMods.keys():
                self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName,
                                  doLogCollect = self.doLogCollect)

        workload.setBlockCloseSettings(self.blockCloseDelay,
                                       workload.getBlockCloseMaxFiles(),
                                       workload.getBlockCloseMaxEvents(),
                                       workload.getBlockCloseMaxSize())

        # setting the parameters which need to be set for all the tasks
        # sets acquisitionEra, processingVersion, processingString
        workload.setTaskPropertiesFromWorkload()

        # set the LFN bases (normally done by request manager)
        # also pass runNumber (workload evaluates it)
        workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase,
                            runNumber = self.runNumber)

        return workload
Example #51
0
    def testC_testViews(self):
        """
        _testViews_

        Prototype test for what should be a lot of other tests.
        """
        PSetTweak = "Hello, I am a PSetTweak.  It's nice to meet you."
        attach = "Hello, I am an attachment"

        configCache = ConfigCache(os.environ["COUCHURL"],
                                  couchDBName='config_test')
        configCache.createUserGroup(groupname="testGroup", username='******')
        configCache.setPSetTweaks(PSetTweak=PSetTweak)
        configCache.attachments['attach1'] = attach
        configCache.document['md5_hash'] = "somemd5"
        psetPath = os.path.join(getTestBase(), "WMCore_t/Cache_t/PSet.txt")
        configCache.addConfig(newConfig=psetPath, psetHash=None)
        configCache.save()

        configCache2 = ConfigCache(os.environ["COUCHURL"],
                                   couchDBName='config_test')
        configCache2.document['md5_hash'] = configCache.document['md5_hash']
        configCache2.load()

        self.assertEqual(configCache2.attachments.get('attach1', None), attach)
        configCache2.delete()
        return
Example #52
0
    def testB_addingConfigsAndTweaks(self):
        """
        _addingConfigsAndTweaks_
        
        Test adding config files and tweak files
        """
        PSetTweak = "Hello, I am a PSetTweak.  It's nice to meet you."
        attach    = "Hello, I am an attachment"

        configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test')
        configCache.createUserGroup(groupname = "testGroup", username = '******')
        configCache.setPSetTweaks(PSetTweak = PSetTweak)
        configCache.attachments['attach1'] = attach
        psetPath = os.path.join(getTestBase(), "WMCore_t/Cache_t/PSet.txt")
        configCache.addConfig(newConfig = psetPath, psetHash = None)

        configCache.setLabel("sample-label")
        configCache.setDescription("describe this config here")
        configCache.save()
        configString1 = configCache.getConfig()

        configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test',
                                   id = configCache.getCouchID(),
                                   rev = configCache.getCouchRev())
        configCache2.loadByID(configCache.getCouchID())
        configString2 = configCache2.getConfig()
        
        self.assertEqual(configString1, configString2)
        self.assertEqual(configCache2.attachments.get('attach1', None), attach)

        configCache.delete()
        return