Beispiel #1
0
def _validateDatatier(datatier, dbsUrl, expiration=3600):
    """
    _validateDatatier_

    Provided a list of datatiers extracted from the outputDatasets, checks
    whether they all exist in DBS.
    """
    cacheName = "dataTierList_" + md5(
        encodeUnicodeToBytesConditional(dbsUrl, condition=PY3)).hexdigest()
    if not GenericDataCache.cacheExists(cacheName):
        mc = MemoryCacheStruct(expiration,
                               getDataTiers,
                               kwargs={'dbsUrl': dbsUrl})
        GenericDataCache.registerCache(cacheName, mc)

    cacheData = GenericDataCache.getCacheData(cacheName)
    dbsTiers = cacheData.getData()
    badTiers = list(set(datatier) - set(dbsTiers))
    if badTiers:
        raise InvalidSpecParameterValue(
            "Bad datatier(s): %s not available in DBS." % badTiers)
Beispiel #2
0
    def handleDQMFileSaver(self):
        """
        _handleDQMFileSaver_

        Harvesting jobs have the dqmFileSaver EDAnalyzer that must
        be tweaked with the dataset name in order to store it
        properly in the DQMGUI, others tweaks can be added as well
        """

        runIsComplete = getattr(self.jobBag, "runIsComplete", False)
        multiRun = getattr(self.jobBag, "multiRun", False)
        runLimits = getattr(self.jobBag, "runLimits", "")
        self.logger.info(
            "DQMFileSaver set to multiRun: %s, runIsComplete: %s, runLimits: %s",
            multiRun, runIsComplete, runLimits)

        procScript = "cmssw_handle_dqm_filesaver.py"

        cmd = "%s --input_pkl %s --output_pkl %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location, self.configPickle))

        if hasattr(self.step.data.application.configuration,
                   "pickledarguments"):
            pklArgs = encodeUnicodeToBytesConditional(
                self.step.data.application.configuration.pickledarguments,
                condition=PY3)
            args = pickle.loads(pklArgs)
            datasetName = args.get('datasetName', None)
        if datasetName:
            cmd += " --datasetName %s" % (datasetName)
        if multiRun and runLimits:
            cmd += " --multiRun --runLimits=%s" % (runLimits)
        if runIsComplete:
            cmd += " --runIsComplete"
        self.scramRun(cmd)

        return
Beispiel #3
0
    def createTopLevelFileset(self, topLevelFilesetName=None):
        """
        _createTopLevelFileset_

        Create the top level fileset for the workflow.  If the name of the top
        level fileset is not given create one.
        """
        if topLevelFilesetName is None:
            filesetName = ("%s-%s" % (self.wmSpec.name(),
                                      self.wmSpec.getTopLevelTask()[0].name()))
            if self.block:
                filesetName += "-%s" % self.block
            if self.mask:
                from hashlib import md5
                mask_string = ",".join(["%s=%s" % (x, self.mask[x]) for x in sorted(self.mask)])
                mask_string = encodeUnicodeToBytesConditional(mask_string, condition=PY3)
                filesetName += "-%s" % md5(mask_string).hexdigest()
        else:
            filesetName = topLevelFilesetName

        self.topLevelFileset = Fileset(filesetName)
        self.topLevelFileset.create()
        return
    def id(self):
        """Generate id for element

        id is deterministic and can be used to identify duplicate elements.
        Calculation only includes fields which affect the workflow and input data.
        Result is an md5 hash of a ';' separated list of:
        workflow name, task name, list of inputs, mask, ACDC info, Dbs instance.

        Parent file info not accounted.

        Example:
        >>> WorkQueueElement(RequestName = 'a', TaskName = 'b').id
        '9ef03a6ad8f16d74fb5ba44df92bf1ef'

        Warning: Any change to this function may prevent identical existing and
        new elements from appearing equivalent, thus in the case of expanding
        work subscriptions work duplication can occur. Care must be taken
        if any modification is made.
        """
        if self._id:
            return self._id
        # Assume md5 is good enough for now
        myhash = md5()
        spacer = ';'  # character not present in any field
        myhash.update(encodeUnicodeToBytesConditional(self['RequestName'] + spacer, condition=PY3))
        # Task will be None in global inbox
        myhash.update(encodeUnicodeToBytesConditional(repr(self['TaskName']) + spacer, condition=PY3))
        myhash.update(encodeUnicodeToBytesConditional(",".join(sorted(self['Inputs'].keys())) + spacer, condition=PY3))
        # Check repr is reproducible - should be
        if self['Mask']:
            myhash.update(encodeUnicodeToBytesConditional(",".join(["%s=%s" % (x, y) for x, y in viewitems(self['Mask'])]) + spacer, condition=PY3))
        else:
            myhash.update(encodeUnicodeToBytesConditional("None" + spacer, condition=PY3))
        # Check ACDC is deterministic and all params relevant
        myhash.update(encodeUnicodeToBytesConditional(",".join(["%s=%s" % (x, y) for x, y in viewitems(self['ACDC'])]) + spacer, condition=PY3))
        myhash.update(encodeUnicodeToBytesConditional(repr(self['Dbs']) + spacer, condition=PY3))
        self._id = myhash.hexdigest()
        return self._id
Beispiel #5
0
    def execute(self, emulator=None):
        """
        _execute_

        """
        if emulator is not None:
            return emulator.emulate(self.step, self.job)

        logging.info("Steps.Executors.%s.execute called",
                     self.__class__.__name__)

        stepModule = "WMTaskSpace.%s" % self.stepName

        overrides = {}
        if hasattr(self.step, 'override'):
            overrides = self.step.override.dictionary_()
        self.failedPreviousStep = overrides.get('previousCmsRunFailure', False)

        if self.failedPreviousStep:
            # the previous cmsRun step within this task failed
            # don't bother executing anything else then
            msg = WM_JOB_ERROR_CODES[99108]
            logging.critical(msg)
            self._setStatus(99108, msg)
            raise WMExecutionFailure(99108, "CmsRunFailure", msg)

        # write the wrapper script to a temporary location
        # I don't pass it directly through os.system because I don't
        # trust that there won't be shell-escape shenanigans with
        # arbitrary input files
        scramSetup = self.step.application.setup.softwareEnvironment
        scramCommand = self.step.application.setup.scramCommand
        scramProject = self.step.application.setup.scramProject
        scramArch = self.step.application.setup.scramArch
        cmsswVersion = self.step.application.setup.cmsswVersion
        jobReportXML = self.step.output.jobReport
        cmsswCommand = self.step.application.command.executable
        cmsswConfig = self.step.application.command.configuration
        cmsswArguments = self.step.application.command.arguments
        userTarball = ','.join(self.step.user.inputSandboxes)
        userFiles = ','.join(self.step.user.userFiles)
        logging.info('User files are %s', userFiles)
        logging.info('User sandboxes are %s', userTarball)

        scramArch = getSingleScramArch(scramArch)

        try:
            multicoreSettings = self.step.application.multicore
            logging.info("CMSSW configured for %s cores and %s event streams",
                         multicoreSettings.numberOfCores,
                         multicoreSettings.eventStreams)
        except AttributeError:
            logging.info(
                "No value set for multicore numberOfCores or eventStreams")

        try:
            gpuSettings = self.step.application.gpu
            logging.info(
                "CMSSW configured for GPU required: %s, with these settings: %s",
                gpuSettings.gpuRequired, gpuSettings.gpuRequirements)
        except AttributeError:
            logging.info(
                "No value set for GPU gpuRequired and/or gpuRequirements")

        logging.info("Executing CMSSW step")

        #
        # set any global environment variables
        #
        try:
            os.environ['FRONTIER_ID'] = 'wmagent_%s' % (
                self.report.data.workload)
        except Exception as ex:
            logging.error('Have critical error in setting FRONTIER_ID: %s',
                          str(ex))
            logging.error(
                'Continuing, as this is not a critical function yet.')

        #
        # scram bootstrap
        #
        scram = Scram(
            command=scramCommand,
            version=cmsswVersion,
            initialise=self.step.application.setup.softwareEnvironment,
            directory=self.step.builder.workingDir,
            architecture=scramArch,
        )

        logging.info("Runing SCRAM")
        try:
            projectOutcome = scram.project()
        except Exception as ex:
            msg = WM_JOB_ERROR_CODES[50513]
            msg += "\nDetails: %s" % str(ex)
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)
        if projectOutcome > 0:
            msg = WM_JOB_ERROR_CODES[50513]
            msg += "\nDetails: %s" % str(scram.diagnostic())
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)

        runtimeOutcome = scram.runtime()
        if runtimeOutcome > 0:
            msg = WM_JOB_ERROR_CODES[50513]
            msg += "\nDetails: %s" % str(scram.diagnostic())
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)

        #
        # pre scripts
        #
        logging.info("Running PRE scripts")
        for script in self.step.runtime.preScripts:
            # TODO: Exception handling and error handling & logging
            scriptProcess = subprocess.Popen(
                ["/bin/bash"],
                shell=True,
                cwd=self.step.builder.workingDir,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )
            # BADPYTHON
            invokeCommand = "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH\n"
            invokeCommand += "{} -m WMCore.WMRuntime.ScriptInvoke {} {} \n".format(
                sys.executable, stepModule, script)
            logging.info("    Invoking command:\n%s", invokeCommand)
            scriptProcess.stdin.write(
                encodeUnicodeToBytesConditional(invokeCommand, condition=PY3))
            stdout, stderr = scriptProcess.communicate()
            retCode = scriptProcess.returncode
            if retCode > 0:
                msg = "Error running command\n%s\n" % invokeCommand
                msg += "%s\n %s\n %s\n" % (retCode, stdout, stderr)
                logging.critical("Error running command")
                logging.critical(msg)
                raise WMExecutionFailure(50513, "PreScriptFailure", msg)

        #
        # pre scripts with scram
        #
        logging.info("RUNNING SCRAM SCRIPTS")
        for script in self.step.runtime.scramPreScripts:
            # invoke scripts with scram()
            runtimeDir = getattr(self.step.runtime, 'scramPreDir', None)
            invokeCommand = self.step.runtime.invokeCommand if hasattr(self.step.runtime, 'invokeCommand') else \
                "%s -m WMCore.WMRuntime.ScriptInvoke %s" % (sys.executable, stepModule)
            invokeCommand += " %s \n" % script
            retCode = scram(invokeCommand, runtimeDir=runtimeDir)
            if retCode > 0:
                msg = "Error running command\n%s\n" % invokeCommand
                msg += "%s\n " % retCode
                msg += scram.diagnostic()
                logging.critical(msg)
                raise WMExecutionFailure(50513, "PreScriptScramFailure", msg)

        configPath = "%s/%s-main.sh" % (self.step.builder.workingDir,
                                        self.stepName)
        with open(configPath, 'w') as handle:
            handle.write(CONFIG_BLOB)

        # spawn this new process
        # the script looks for:
        # <SCRAM_COMMAND> <SCRAM_PROJECT> <CMSSW_VERSION> <JOB_REPORT> <EXECUTABLE> <CONFIG>
        # open the output files
        stdoutHandle = open(self.step.output.stdout, 'w')
        stderrHandle = open(self.step.output.stderr, 'w')
        args = [
            '/bin/bash', configPath, scramSetup, scramArch, scramCommand,
            scramProject, cmsswVersion, jobReportXML, cmsswCommand,
            cmsswConfig, userTarball, userFiles, cmsswArguments
        ]
        logging.info("Executing CMSSW. args: %s", args)

        # possibly needed environment overrides for CMSSW call go here
        envOverride = {}
        # Do not pass WM PYTHONPATH to CMSSW environment
        pythonPath = os.environ.get('PYTHONPATH', '')
        envOverride['PYTHONPATH'] = ""
        # work around problem with GSI authentication plugin and EOS at CERN
        if socket.getfqdn().endswith("cern.ch"):
            envOverride['XRD_LOADBALANCERTTL'] = "86400"
        # some libraries linked with CMSSW need HOME in the environment
        if 'HOME' not in os.environ:
            envOverride['HOME'] = os.environ.get('PWD', "/")

        os.environ.update(envOverride)

        returnCode = subprocess.call(args,
                                     stdout=stdoutHandle,
                                     stderr=stderrHandle)
        returnMessage = None

        # Return PYTHONPATH to its original value, as this
        # is needed for stepChain workflows, so other prescripts
        # are able to find WMCore modules
        envOverride['PYTHONPATH'] = pythonPath
        os.environ.update(envOverride)

        if returnCode != 0:
            argsDump = {'arguments': args}
            msg = "Error running cmsRun\n%s\n" % argsDump
            try:
                self.report.parse(jobReportXML, stepName=self.stepName)
                (returnCode,
                 returnMessage) = self.report.getStepExitCodeAndMessage(
                     stepName=self.stepName)
                msg += "CMSSW Return code: %s\n" % returnCode
            except Exception as ex:
                # If report parsing fails, report linux exit code
                msg += "Linux Return code: %s\n" % returnCode
            finally:
                logging.critical(msg)
                logging.critical("Error message: %s", returnMessage)
                self._setStatus(returnCode, returnMessage)
                raise WMExecutionFailure(returnCode, "CmsRunFailure", msg)
        else:
            self._setStatus(returnCode, returnMessage)

        stdoutHandle.close()
        stderrHandle.close()

        try:
            self.report.parse(jobReportXML, stepName=self.stepName)
        except Exception as ex:
            msg = WM_JOB_ERROR_CODES[50115]
            msg += "\nDetails: %s" % str(ex)
            raise WMExecutionFailure(50115, "BadJobReportXML", msg)

        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()

        acquisitionEra = typeHelper.getAcqEra() or self.task.getAcquisitionEra(
        )
        processingVer = typeHelper.getProcVer(
        ) or self.task.getProcessingVersion()
        processingStr = typeHelper.getProcStr(
        ) or self.task.getProcessingString()
        prepID = typeHelper.getPrepId() or self.task.getPrepID()
        globalTag = typeHelper.getGlobalTag()
        validStatus = self.workload.getValidStatus()
        inputPath = self.task.getInputDatasetPath()
        campaign = self.workload.getCampaign()
        cacheUrl, cacheDB, configID = stepHelper.getConfigInfo()

        self.report.setValidStatus(validStatus=validStatus)
        self.report.setGlobalTag(globalTag=globalTag)
        self.report.setCampaign(campaign)
        self.report.setPrepID(prepID)
        self.report.setInputDataset(inputPath=inputPath)
        self.report.setAcquisitionProcessing(acquisitionEra=acquisitionEra,
                                             processingVer=processingVer,
                                             processingStr=processingStr)
        self.report.setConfigURL(configURL="%s;;%s;;%s" %
                                 (cacheUrl, cacheDB, configID))

        # Attach info to files
        self.report.addInfoToOutputFilesForStep(stepName=self.stepName,
                                                step=self.step)

        self.report.checkForOutputFiles(stepName=self.stepName)
        self.report.checkForAdlerChecksum(stepName=self.stepName)
        self.report.checkForRunLumiInformation(stepName=self.stepName)

        if self.step.output.keep != True:
            self.report.killOutput()
        else:
            # Check that we only keep the desired output
            for module in stepHelper.getIgnoredOutputModules():
                self.report.deleteOutputModuleForStep(stepName=self.stepName,
                                                      moduleName=module)

        # Add stageout LFN to existing TFileService files
        reportAnalysisFiles = self.report.getAnalysisFilesFromStep(
            self.stepName)
        for reportAnalysisFile in reportAnalysisFiles:
            newLFN = analysisFileLFN(reportAnalysisFile.fileName,
                                     self.step.user.lfnBase, self.job)
            addAttributesToFile(reportAnalysisFile,
                                pfn=reportAnalysisFile.fileName,
                                lfn=newLFN,
                                validate=False)

        # Add analysis file entries for additional files listed in workflow
        for fileName in stepHelper.listAnalysisFiles():
            analysisFile = stepHelper.getAnalysisFile(fileName)
            if os.path.isfile(analysisFile.fileName):
                newLFN = analysisFileLFN(analysisFile.fileName,
                                         analysisFile.lfnBase, self.job)
                self.report.addAnalysisFile(analysisFile.fileName,
                                            lfn=newLFN,
                                            Source='UserDefined',
                                            pfn=os.path.join(
                                                os.getcwd(),
                                                analysisFile.fileName),
                                            validate=False)

        return
Beispiel #6
0
def stream_maybe_etag(size_limit, etag, reply):
    """Maybe generate ETag header for the response, and handle If-Match
    and If-None-Match request headers. Consumes the reply until at most
    `size_limit` bytes. If the response fits into that size, adds the
    ETag header and matches it against any If-Match / If-None-Match
    request headers and replies appropriately.

    If the response is fully buffered, and the `reply` generator actually
    results in an error and sets X-Error-HTTP / X-Error-Detail headers,
    converts that error back into a real HTTP error response. Otherwise
    responds with the fully buffered body directly, without generator
    and chunking. In other words, responses smaller than `size_limit`
    are always fully buffered and replied immediately without chunking.
    If the response is not fully buffered, it's guaranteed to be output
    at original chunk boundaries.

    Note that if this function is fed the output from `stream_compress()`
    as it normally would be, the `size_limit` constrains the compressed
    size, and chunk boundaries correspond to compressed chunks."""

    req = cherrypy.request
    res = cherrypy.response
    match = [str(x) for x in (req.headers.elements('If-Match') or [])]
    nomatch = [str(x) for x in (req.headers.elements('If-None-Match') or [])]

    # If ETag is already set, match conditions and output without buffering.
    etagval = res.headers.get('ETag', None)
    if etagval:
        _etag_match(res.status or 200, etagval, match, nomatch)
        res.headers['Trailer'] = 'X-REST-Status'
        return _etag_tail([], reply, None)

    # Buffer up to size_limit bytes internally. This interally builds up the
    # ETag value inside 'etag'. In case of exceptions the ETag invalidates.
    # If we exceed the limit, fall back to streaming without checking ETag
    # against If-Match/If-None-Match. We'll still set the ETag in the trailer
    # headers, so clients which understand trailers will get the value; most
    # clients including browsers will ignore them.
    size = 0
    result = []
    for chunk in reply:
        result.append(chunk)
        size += len(chunk)
        if size > size_limit:
            res.headers['Trailer'] = 'X-REST-Status'
            return _etag_tail(result, reply, etag)

    # We've buffered the entire response, but it may be an error reply. The
    # generator code does not know if it's allowed to raise exceptions, so
    # it swallows all errors and converts them into X-* headers. We recover
    # the original HTTP response code and message from X-Error-{HTTP,Detail}
    # headers, if any are present.
    err = res.headers.get('X-Error-HTTP', None)
    if err:
        message = res.headers.get('X-Error-Detail', 'Original error lost')
        raise cherrypy.HTTPError(int(err), message)

    # OK, we buffered the entire reply and it's ok. Check ETag match criteria.
    # The original stream generator must guarantee that if it fails it resets
    # the 'etag' value, even if the error handlers above didn't run.
    etagval = etag.value()
    if etagval:
        res.headers['ETag'] = etagval
        _etag_match(res.status or 200, etagval, match, nomatch)

    # OK, respond with the buffered reply as a plain string.
    res.headers['Content-Length'] = size
    # TODO investigate why `result` is a list of bytes strings in py3
    # The current solution seems to work in both py2 and py3
    resp = b"" if PY3 else ""
    for item in result:
        resp += encodeUnicodeToBytesConditional(item, condition=PY3)
    assert len(resp) == size
    return resp
Beispiel #7
0
def gen_color(val):
    "Generate unique color code for given string value"
    keyhash = hashlib.md5()
    keyhash.update(encodeUnicodeToBytesConditional(val, condition=PY3))
    col = '#%s' % keyhash.hexdigest()[:6]
    return col
    def getJobParameters(self, jobList):
        """
        _getJobParameters_

        Return a list of dictionaries with submit parameters per job.
        """

        undefined = 'UNDEFINED'
        jobParameters = []

        for job in jobList:
            ad = {}

            ad['initial_Dir'] = encodeUnicodeToBytesConditional(job['cache_dir'], condition=PY2)
            ad['transfer_input_files'] = "%s,%s/%s,%s" % (job['sandbox'], job['packageDir'],
                                                   'JobPackage.pkl', self.unpacker)
            ad['Arguments'] = "%s %i %s" % (os.path.basename(job['sandbox']), job['id'], job["retry_count"])
            ad['transfer_output_files'] = "Report.%i.pkl,wmagentJob.log" % job["retry_count"]

            # Dictionary keys need to be consistent across all jobs within the same 
            # clusterId when working with queue_with_itemdata()
            # Initialize 'Requirements' to an empty string for all jobs.
            # See issue: https://htcondor-wiki.cs.wisc.edu/index.cgi/tktview?tn=7715 
            ad['Requirements'] = ''
            # Do not define custom Requirements for Volunteer resources
            if self.reqStr is not None:
                ad['Requirements'] = self.reqStr

            ad['My.x509userproxy'] = classad.quote(self.x509userproxy)
            sites = ','.join(sorted(job.get('possibleSites')))
            ad['My.DESIRED_Sites'] = classad.quote(str(sites))
            sites = ','.join(sorted(job.get('potentialSites')))
            ad['My.ExtDESIRED_Sites'] = classad.quote(str(sites))
            ad['My.CMS_JobRetryCount'] = str(job['retry_count'])
            ad['My.WMAgent_RequestName'] = classad.quote(encodeUnicodeToBytesConditional(job['request_name'], condition=PY2))
            match = re.compile("^[a-zA-Z0-9_]+_([a-zA-Z0-9]+)-").match(job['request_name'])
            if match:
                ad['My.CMSGroups'] = classad.quote(match.groups()[0])
            else:
                ad['My.CMSGroups'] = undefined
            ad['My.WMAgent_JobID'] = str(job['jobid'])
            ad['My.WMAgent_SubTaskName'] = classad.quote(encodeUnicodeToBytesConditional(job['task_name'], condition=PY2))
            ad['My.CMS_JobType'] = classad.quote(encodeUnicodeToBytesConditional(job['task_type'], condition=PY2))
            ad['My.CMS_Type'] = classad.quote(activityToType(job['activity']))
            ad['My.CMS_RequestType'] = classad.quote(job['requestType'])

            # Handling for AWS, cloud and opportunistic resources
            ad['My.AllowOpportunistic'] = str(job.get('allowOpportunistic', False))
            if job.get('inputDataset'):
                ad['My.DESIRED_CMSDataset'] = classad.quote(encodeUnicodeToBytesConditional(job['inputDataset'], condition=PY2))
            else:
                ad['My.DESIRED_CMSDataset'] = undefined
            if job.get('inputDatasetLocations'):
                sites = ','.join(sorted(job['inputDatasetLocations']))
                ad['My.DESIRED_CMSDataLocations'] = classad.quote(str(sites))
            else:
                ad['My.DESIRED_CMSDataLocations'] = undefined
            if job.get('inputPileup'):
                cmsPileups=','.join(sorted(job['inputPileup']))
                ad['My.DESIRED_CMSPileups'] = classad.quote(str(cmsPileups))
            else:
                ad['My.DESIRED_CMSPileups'] = undefined
            # HighIO
            ad['My.Requestioslots'] = str(1 if job['task_type'] in ["Merge", "Cleanup", "LogCollect"] else 0)
            # GPU resource handling
            # while we do not support a third option for RequiresGPU, make a binary decision
            if job['requiresGPU'] == "required":
                ad['My.RequiresGPU'] = "1"
                ad['request_GPUs'] = "1"
            else:
                ad['My.RequiresGPU'] = "0"
                ad['request_GPUs'] = "0"
            if job.get('gpuRequirements', None):
                ad['My.GPUMemoryMB'] = str(job['gpuRequirements']['GPUMemoryMB'])
                cudaCapabilities = ','.join(sorted(job['gpuRequirements']['CUDACapabilities']))
                ad['My.CUDACapability'] = classad.quote(str(cudaCapabilities))
                ad['My.CUDARuntime'] = classad.quote(job['gpuRequirements']['CUDARuntime'])
            else:
                ad['My.GPUMemoryMB'] = undefined
                ad['My.CUDACapability'] = undefined
                ad['My.CUDARuntime'] = undefined
            # Performance and resource estimates (including JDL magic tweaks)
            origCores = job.get('numberOfCores', 1)
            estimatedMins = int(job['estimatedJobTime'] / 60.0) if job.get('estimatedJobTime') else 12 * 60
            estimatedMinsSingleCore = estimatedMins * origCores
            # For now, assume a 15 minute job startup overhead -- condor will round this up further
            ad['My.EstimatedSingleCoreMins'] = str(estimatedMinsSingleCore)
            ad['My.OriginalMaxWallTimeMins'] = str(estimatedMins)
            ad['My.MaxWallTimeMins'] = 'WMCore_ResizeJob ? (EstimatedSingleCoreMins/RequestCpus + 15) : OriginalMaxWallTimeMins'
            requestMemory = int(job['estimatedMemoryUsage']) if job.get('estimatedMemoryUsage', None) else 1000
            ad['My.OriginalMemory'] = str(requestMemory)
            ad['My.ExtraMemory'] = str(self.extraMem)
            ad['request_memory'] = 'OriginalMemory + ExtraMemory * (WMCore_ResizeJob ? (RequestCpus-OriginalCpus) : 0)'
            requestDisk = int(job['estimatedDiskUsage']) if job.get('estimatedDiskUsage', None) else 20 * 1000 * 1000 * origCores
            ad['request_disk'] = str(requestDisk)
            # Set up JDL for multithreaded jobs.
            # By default, RequestCpus will evaluate to whatever CPU request was in the workflow.
            # If the job is labelled as resizable, then the logic is more complex:
            # - If the job is running in a slot with N cores, this should evaluate to N
            # - If the job is being matched against a machine, match all available CPUs, provided
            # they are between min and max CPUs.
            # - Otherwise, just use the original CPU count.
            ad['My.MinCores'] = str(job.get('minCores', max(1, origCores / 2)))
            ad['My.MaxCores'] = str(max(int(job.get('maxCores', origCores)), origCores))
            ad['My.OriginalCpus'] = str(origCores)
            # Prefer slots that are closest to our MaxCores without going over.
            # If the slot size is _greater_ than our MaxCores, we prefer not to
            # use it - we might unnecessarily fragment the slot.
            ad['Rank'] = 'isUndefined(Cpus) ? 0 : ifThenElse(Cpus > MaxCores, -Cpus, Cpus)'
            # Record the number of CPUs utilized at match time.  We'll use this later
            # for monitoring and accounting.  Defaults to 0; once matched, it'll
            # put an attribute in the job  MATCH_EXP_JOB_GLIDEIN_Cpus = 4
            ad['My.JOB_GLIDEIN_Cpus'] = classad.quote("$$(Cpus:0)")
            # Make sure the resize request stays within MinCores and MaxCores.
            ad['My.RequestResizedCpus'] = '(Cpus>MaxCores) ? MaxCores : ((Cpus < MinCores) ? MinCores : Cpus)'
            # If the job is running, then we should report the matched CPUs in RequestCpus - but only if there are sane
            # values.  Otherwise, we just report the original CPU request
            ad['My.JobCpus'] = ('((JobStatus =!= 1) && (JobStatus =!= 5) && !isUndefined(MATCH_EXP_JOB_GLIDEIN_Cpus) '
                              '&& (int(MATCH_EXP_JOB_GLIDEIN_Cpus) isnt error)) ? int(MATCH_EXP_JOB_GLIDEIN_Cpus) : OriginalCpus')
            # Cpus is taken from the machine ad - hence it is only defined when we are doing negotiation.
            # Otherwise, we use either the cores in the running job (if available) or the original cores.
            ad['request_cpus'] = 'WMCore_ResizeJob ? (!isUndefined(Cpus) ? RequestResizedCpus : JobCpus) : OriginalCpus'
            ad['My.WMCore_ResizeJob'] = str(job.get('resizeJob', False))
            taskPriority = int(job.get('taskPriority', 1))
            priority = int(job.get('wf_priority', 0))
            ad['My.JobPrio'] = str(int(priority + taskPriority * self.maxTaskPriority))
            ad['My.PostJobPrio1'] = str(int(-1 * len(job.get('potentialSites', []))))
            ad['My.PostJobPrio2'] = str(int(-1 * job['task_id']))
            # Add OS requirements for jobs
            requiredOSes = self.scramArchtoRequiredOS(job.get('scramArch'))
            ad['My.REQUIRED_OS'] = classad.quote(encodeUnicodeToBytesConditional(requiredOSes, condition=PY2))
            cmsswVersions = ','.join(job.get('swVersion'))
            ad['My.CMSSW_Versions'] = classad.quote(encodeUnicodeToBytesConditional(cmsswVersions, condition=PY2))
            requiredArch = self.scramArchtoRequiredArch(job.get('scramArch'))
            if not requiredArch:  # only Cleanup jobs should not have ScramArch defined
                ad['Requirements'] = '(TARGET.Arch =!= Undefined)'
            else:
                ad['Requirements'] = '(TARGET.Arch =?= "{}")'.format(requiredArch)

            jobParameters.append(ad)    
             
        return jobParameters
Beispiel #9
0
        newWriter.__name__ = func.__name__
        newWriter.__doc__ = func.__doc__
        newWriter.__dict__.update(func.__dict__)
        subproc = args[0]
        line = args[1]
        escapedLine = "echo \"%s\"\n" % line
        func(subproc, escapedLine)

    return newWriter


#  //
# // Interceptable function to push commands to the subshell, used to
# //  enable test mode.
procWriter = lambda s, l: s.stdin.write(
    encodeUnicodeToBytesConditional(l, condition=PY3))


class Scram(object):
    """
    _Scram_

    Object to encapsulate a scram "session" that can be used to create
    a project area, bootstrap the environment and then use that to
    execute commands

    Simple enumeration of scram errors is performed, to allow to be mapped to standard
    exception/error conditions

    """
    def __init__(self, **options):
Beispiel #10
0
    def newWriter(*args):
        newWriter.__name__ = func.__name__
        newWriter.__doc__ = func.__doc__
        newWriter.__dict__.update(func.__dict__)
        subproc = args[0]
        line = args[1]
        escapedLine = "echo \"%s\"\n" % line
        func(subproc, escapedLine)

    return newWriter


#  //
# // Interceptable function to push commands to the subshell, used to
# //  enable test mode.
procWriter = lambda s, l: s.stdin.write(encodeUnicodeToBytesConditional(l, condition=PY3))


class Scram(object):
    """
    _Scram_

    Object to encapsulate a scram "session" that can be used to create
    a project area, bootstrap the environment and then use that to
    execute commands

    Simple enumeration of scram errors is performed, to allow to be mapped to standard
    exception/error conditions

    """
Beispiel #11
0
    def __call__(self):
        """
        _call_

        Examine the step configuration and construct a PSet from that.

        """
        self.logger.info("Executing SetupCMSSWPSet...")
        self.jobBag = self.job.getBaggage()
        self.configPickle = getattr(self.step.data.application.command,
                                    "configurationPickle", "PSet.pkl")
        self.psetFile = getattr(self.step.data.application.command,
                                "configuration", "PSet.py")
        self.scram = self.createScramEnv()

        scenario = getattr(self.step.data.application.configuration,
                           "scenario", None)
        funcName = getattr(self.step.data.application.configuration,
                           "function", None)
        if scenario is not None and scenario != "":
            self.logger.info("Setting up job scenario/process")
            if getattr(self.step.data.application.configuration,
                       "pickledarguments", None) is not None:
                pklArgs = encodeUnicodeToBytesConditional(
                    self.step.data.application.configuration.pickledarguments,
                    condition=PY3)
                funcArgs = pickle.loads(pklArgs)
            else:
                funcArgs = {}

            # Create process
            try:
                self.createProcess(scenario, funcName, funcArgs)
            except Exception as ex:
                self.logger.exception(
                    "Error creating process for Config/DataProcessing:")
                raise ex
            # Now, load the new picked process
            try:
                with open(
                        os.path.join(self.stepSpace.location,
                                     self.configPickle), 'rb') as f:
                    self.process = Unpickler(f).load()
            except ImportError as ex:
                msg = "Unable to import pset from %s:\n" % self.psetFile
                msg += str(ex)
                self.logger.error(msg)
                raise ex

            if funcName == "repack":
                self.handleRepackSettings()

            if funcName in ["merge", "alcaHarvesting"]:
                self.handleSingleCoreOverride()

            if socket.getfqdn().endswith("cern.ch"):
                self.handleSpecialCERNMergeSettings(funcName)
        else:
            self.logger.info("DEBUG: Now in the none scenario to load PSET")
            try:
                self.loadPSet()
            except Exception as ex:
                self.logger.exception("Error loading PSet:")
                raise ex

        # Check process.source exists
        self.logger.info("Debug: Self.process")
        self.logger.info(dir(self.process))
        if getattr(self.process, "source", None) is None and getattr(
                self.process, "_Process__source", None) is None:
            msg = "Error in CMSSW PSet: process is missing attribute 'source'"
            msg += " or process.source is defined with None value."
            self.logger.error(msg)
            raise RuntimeError(msg)

        self.handleCondorStatusService()
        self.fixupProcess()

        # In case of CRAB3, the number of threads in the PSet should not be overridden
        if not self.crabPSet:
            try:
                self.makeThreadsStreamsTweak()
            except AttributeError as ex:
                self.logger.error("Failed to override numberOfThreads: %s",
                                  str(ex))

        # Apply task level tweaks
        makeTaskTweak(self.step.data, self.tweak)
        self.applyPsetTweak(self.tweak, cleanupTweak=True)

        # Check if chained processing is enabled
        # If not - apply the per job tweaks
        # If so - create an override TFC (like done in PA) and then modify thePSet accordingly
        if hasattr(self.step.data.input, "chainedProcessing"
                   ) and self.step.data.input.chainedProcessing:
            self.logger.info("Handling Chain processing tweaks")
            self.handleChainedProcessingTweak()
        else:
            self.logger.info("Creating job level tweaks")
            makeJobTweak(self.job, self.tweak)
        self.applyPsetTweak(self.tweak, cleanupTweak=True)

        # check for pileup settings presence, pileup support implementation
        # and if enabled, process pileup configuration / settings
        if hasattr(self.step.data, "pileup"):
            self.handlePileup()

        # Apply per output module PSet Tweaks
        self.logger.info("Output module section")
        cmsswStep = self.step.getTypeHelper()
        for om in cmsswStep.listOutputModules():
            mod = cmsswStep.getOutputModule(om)
            modName = mod.getInternalName()

            if funcName == 'merge':
                # Do not use both Merged output label unless useErrorDataset is False
                # Do not use both MergedError output label unless useErrorDataset is True
                useErrorDataset = getattr(self.jobBag, "useErrorDataset",
                                          False)

                if useErrorDataset and modName != 'MergedError':
                    continue
                if not useErrorDataset and modName == 'MergedError':
                    continue

            makeOutputTweak(mod, self.job, self.tweak)
        # allow failed tweaks in this case, to replicate the previous implementation, where it would ignore
        # and continue if it found an output module that  doesn't exist and don't want in the pset like: process.Sqlite
        self.applyPsetTweak(self.tweak,
                            allowFailedTweaks=True,
                            cleanupTweak=True)

        # revlimiter for testing
        if getattr(self.step.data.application.command, "oneEventMode", False):
            self.tweak.addParameter('process.maxEvents.input',
                                    "customTypeCms.untracked.int32(1)")

        # check for random seeds and the method of seeding which is in the job baggage
        self.handleSeeding()

        # make sure default parametersets for perf reports are installed
        self.handlePerformanceSettings()

        # fixup the dqmFileSaver
        self.handleDQMFileSaver()

        # tweak for jobs reading LHE articles from CERN
        self.handleLHEInput()

        # tweak jobs for enforceGUIDInFileName
        self.handleEnforceGUIDInFileName()

        # Check if we accept skipping bad files
        if hasattr(self.step.data.application.configuration, "skipBadFiles"):
            self.tweak.addParameter(
                "process.source.skipBadFiles",
                "customTypeCms.untracked.bool(%s)" %
                self.step.data.application.configuration.skipBadFiles)

        # Apply events per lumi section if available
        if hasattr(self.step.data.application.configuration, "eventsPerLumi"):
            self.tweak.addParameter(
                "process.source.numberEventsInLuminosityBlock",
                "customTypeCms.untracked.uint32(%s)" %
                self.step.data.application.configuration.eventsPerLumi)

        # limit run time if desired
        if hasattr(self.step.data.application.configuration,
                   "maxSecondsUntilRampdown"):
            self.tweak.addParameter(
                "process.maxSecondsUntilRampdown.input",
                "customTypeCms.untracked.PSet(input=cms.untracked.int32(%s))" %
                self.step.data.application.configuration.
                maxSecondsUntilRampdown)

        # accept an overridden TFC from the step
        if hasattr(self.step.data.application, 'overrideCatalog'):
            self.logger.info("Found a TFC override: %s",
                             self.step.data.application.overrideCatalog)
            self.tweak.addParameter(
                "process.source.overrideCatalog",
                "customTypeCms.untracked.string('%s')" %
                self.step.data.application.overrideCatalog)

        configFile = self.step.data.application.command.configuration
        workingDir = self.stepSpace.location
        try:
            self.applyPsetTweak(self.tweak)

            with open("%s/%s" % (workingDir, configFile), 'w') as handle:
                handle.write("import FWCore.ParameterSet.Config as cms\n")
                handle.write("import pickle\n")
                handle.write("with open('%s', 'rb') as handle:\n" %
                             self.configPickle)
                handle.write("    process = pickle.load(handle)\n")
        except Exception as ex:
            self.logger.exception("Error writing out PSet:")
            raise ex

        # check for event numbers in the producers
        self.handleProducersNumberOfEvents()

        self.logger.info("CMSSW PSet setup completed!")

        return 0