コード例 #1
0
ファイル: PerformanceMonitor.py プロジェクト: cronot99/WMCore
    def stepStart(self, step):
        """
        _stepStart_

        Assure that the monitor is pointing at the right step
        """

        self.stepHelper = WMStepHelper(step)
        self.currentStepName = getStepName(step)
        self.currentStepSpace = None

        if not self.stepHelper.stepType() in self.watchStepTypes:
            self.disableStep = True
            logging.debug("PerformanceMonitor ignoring step of type %s",
                          self.stepHelper.stepType())
            return
        else:
            logging.debug("Beginning PeformanceMonitor step Initialization")
            self.disableStep = False

        return
コード例 #2
0
ファイル: WMRuntimeMonitor.py プロジェクト: vytjan/WMCore
#!/usr/bin/env python
"""
_WMRuntimeMonitor_

This is the base class for monitors
"""

import threading
import os

from WMCore.WMException import WMException
from WMCore.WMSpec.Steps.Executor import getStepSpace
from WMCore.WMSpec.WMStep import WMStepHelper

getStepName = lambda step: WMStepHelper(step).name()


class WMRuntimeMonitorException(WMException):
    """
    _StepFactortyException_

    It's like an exception class that does nothing

    """
    pass


class WMRuntimeMonitor:
    def __init__(self):
        self.currentStep = None
        self.currentStepName = None
コード例 #3
0
ファイル: ExecuteMaster.py プロジェクト: ticoann/WMCore
class ExecuteMaster:
    """
    _ExecuteMaster_

    Traverse the given task and invoke the execute framework
    If an emulator is provided, then invoke the appropriate emulator
    instead of the executor

    """
    def __init__(self):
        pass

    def __call__(self, task, wmbsJob):
        """
        _operator(task)_

        Load and run executors for all steps in Task, if an emulator is
        available for that step, use it instead.

        """

        myThread = threading.currentThread

        try:
            myThread.watchdogMonitor.setupMonitors(task, wmbsJob)
            myThread.watchdogMonitor.notifyJobStart(task)
        except WMException:
            self.toTaskDirectory()
            raise
        except Exception, ex:
            msg = "Encountered unhandled exception while starting monitors:\n"
            msg += str(ex) + '\n'
            msg += str(traceback.format_exc()) + '\n'
            logging.error(msg)
            self.toTaskDirectory()
            raise WMExecutionFailure(msg)

        skipToStep = None
        for step in task.steps().nodeIterator():
            try:
                helper = WMStepHelper(step)
                stepType = helper.stepType()
                stepName = helper.name()
                if skipToStep and skipToStep != stepName:
                    # Then we continue until we get to the required step
                    continue
                skipToStep = None  # Reset this when we get to the right step
                executor = StepFactory.getStepExecutor(stepType)
                result = self.doExecution(executor, step, wmbsJob)
                if not result == None:
                    skipToStep = result
            except WMException, ex:
                self.toTaskDirectory()
                break
            except Exception, ex:
                msg = "Encountered error while running ExecuteMaster:\n"
                msg += str(ex) + "\n"
                msg += str(traceback.format_exc()) + "\n"
                self.toTaskDirectory()
                logging.error(msg)
                break
コード例 #4
0
"""
from __future__ import absolute_import

import json
import logging
import os
import subprocess
import sys

from Utils.FileTools import getFullPath
from Utils.Utilities import zipEncodeStr
from WMCore.FwkJobReport.Report import Report
from WMCore.WMSpec.Steps.StepFactory import getStepEmulator
from WMCore.WMSpec.WMStep import WMStepHelper

getStepName = lambda step: WMStepHelper(step).name()
getStepErrorDestination = lambda step: WMStepHelper(step).getErrorDestinationStep()


def getStepSpace(stepName):
    """
    _getStepSpace_

    Util to get the runtime step space.
    This imports dynamic runtime libraries so be careful how
    you use it

    """
    modName = "WMTaskSpace"
    if modName in sys.modules.keys():
        taskspace = sys.modules[modName]
コード例 #5
0
    def execute(self, emulator=None):
        """
        _execute_


        """
        stepModule = "WMTaskSpace.%s" % self.stepName
        if emulator is not None:
            return emulator.emulate(self.step, self.job)

        # write the wrapper script to a temporary location
        # I don't pass it directly through os.system because I don't
        # trust that there won't be shell-escape shenanigans with
        # arbitrary input files
        scramSetup = self.step.application.setup.softwareEnvironment
        scramCommand = self.step.application.setup.scramCommand
        scramProject = self.step.application.setup.scramProject
        scramArch = self.step.application.setup.scramArch
        cmsswVersion = self.step.application.setup.cmsswVersion
        jobReportXML = self.step.output.jobReport
        cmsswCommand = self.step.application.command.executable
        cmsswConfig = self.step.application.command.configuration
        cmsswArguments = self.step.application.command.arguments
        userTarball = ','.join(self.step.user.inputSandboxes)
        userFiles = ','.join(self.step.user.userFiles)
        logging.info('User files are %s', userFiles)
        logging.info('User sandboxes are %s', userTarball)

        scramArch = getSingleScramArch(scramArch)

        multicoreSettings = self.step.application.multicore
        try:
            logging.info("CMSSW configured for %s cores and %s event streams",
                         multicoreSettings.numberOfCores,
                         multicoreSettings.eventStreams)
        except AttributeError:
            logging.info(
                "No value set for multicore numberOfCores or eventStreams")

        logging.info("Executing CMSSW step")

        #
        # set any global environment variables
        #
        try:
            os.environ['FRONTIER_ID'] = 'wmagent_%s' % (
                self.report.data.workload)
        except Exception as ex:
            logging.error('Have critical error in setting FRONTIER_ID: %s',
                          str(ex))
            logging.error(
                'Continuing, as this is not a critical function yet.')

        #
        # scram bootstrap
        #
        scram = Scram(
            command=scramCommand,
            version=cmsswVersion,
            initialise=self.step.application.setup.softwareEnvironment,
            directory=self.step.builder.workingDir,
            architecture=scramArch,
        )

        logging.info("Runing SCRAM")
        try:
            projectOutcome = scram.project()
        except Exception as ex:
            msg = "Exception raised while running scram.\n"
            msg += str(ex)
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)

        if projectOutcome > 0:
            msg = scram.diagnostic()
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)
        runtimeOutcome = scram.runtime()
        if runtimeOutcome > 0:
            msg = scram.diagnostic()
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)

        #
        # pre scripts
        #
        logging.info("Running PRE scripts")
        for script in self.step.runtime.preScripts:
            # TODO: Exception handling and error handling & logging
            scriptProcess = subprocess.Popen(
                ["/bin/bash"],
                shell=True,
                cwd=self.step.builder.workingDir,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )
            # BADPYTHON
            scriptProcess.stdin.write(
                "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH\n")
            invokeCommand = "%s -m WMCore.WMRuntime.ScriptInvoke %s %s \n" % (
                sys.executable, stepModule, script)
            logging.info("    Invoking command: %s", invokeCommand)
            scriptProcess.stdin.write(invokeCommand)
            stdout, stderr = scriptProcess.communicate()
            retCode = scriptProcess.returncode
            if retCode > 0:
                msg = "Error running command\n%s\n" % invokeCommand
                msg += "%s\n %s\n %s\n" % (retCode, stdout, stderr)
                logging.critical("Error running command")
                logging.critical(msg)
                raise WMExecutionFailure(50513, "PreScriptFailure", msg)

        #
        # pre scripts with scram
        #
        logging.info("RUNNING SCRAM SCRIPTS")
        for script in self.step.runtime.scramPreScripts:
            # invoke scripts with scram()
            runtimeDir = getattr(self.step.runtime, 'scramPreDir', None)
            invokeCommand = self.step.runtime.invokeCommand if hasattr(self.step.runtime, 'invokeCommand') else \
                "%s -m WMCore.WMRuntime.ScriptInvoke %s" % (sys.executable, stepModule)
            invokeCommand += " %s \n" % script
            retCode = scram(invokeCommand, runtimeDir=runtimeDir)
            if retCode > 0:
                msg = "Error running command\n%s\n" % invokeCommand
                msg += "%s\n " % retCode
                msg += scram.diagnostic()
                logging.critical(msg)
                raise WMExecutionFailure(50513, "PreScriptScramFailure", msg)

        configPath = "%s/%s-main.sh" % (self.step.builder.workingDir,
                                        self.stepName)
        handle = open(configPath, 'w')
        handle.write(CONFIG_BLOB)
        handle.close()

        # spawn this new process
        # the script looks for:
        # <SCRAM_COMMAND> <SCRAM_PROJECT> <CMSSW_VERSION> <JOB_REPORT> <EXECUTABLE> <CONFIG>
        # open the output files
        stdoutHandle = open(self.step.output.stdout, 'w')
        stderrHandle = open(self.step.output.stderr, 'w')
        args = [
            '/bin/bash', configPath, scramSetup, scramArch, scramCommand,
            scramProject, cmsswVersion, jobReportXML, cmsswCommand,
            cmsswConfig, userTarball, userFiles, cmsswArguments
        ]
        logging.info("Executing CMSSW. args: %s", args)

        # possibly needed environment overrides for CMSSW call go here
        envOverride = {}
        # work around problem with GSI authentication plugin and EOS at CERN
        if socket.getfqdn().endswith("cern.ch"):
            envOverride['XRD_LOADBALANCERTTL'] = "86400"
        # some libraries linked with CMSSW need HOME in the environment
        if 'HOME' not in os.environ:
            envOverride['HOME'] = os.environ.get('PWD', "/")

        os.environ.update(envOverride)

        returncode = subprocess.call(args,
                                     stdout=stdoutHandle,
                                     stderr=stderrHandle)

        self.setCondorChirpAttrDelayed('Chirp_WMCore_cmsRun_ExitCode',
                                       returncode)
        self.setCondorChirpAttrDelayed(
            'Chirp_WMCore_%s_ExitCode' % self.stepName, returncode)

        stdoutHandle.close()
        stderrHandle.close()

        self.step.execution.exitStatus = returncode
        argsDump = {'arguments': args}

        if returncode != 0:
            msg = "Error running cmsRun\n%s\n" % argsDump
            msg += "Return code: %s\n" % returncode
            logging.critical(msg)
            raise WMExecutionFailure(returncode, "CmsRunFailure", msg)

        try:
            self.report.parse(jobReportXML, stepName=self.stepName)
        except Exception as ex:
            # Catch it if something goes wrong
            raise WMExecutionFailure(50115, "BadJobReportXML", str(ex))

        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()

        acquisitionEra = self.task.getAcquisitionEra()
        processingVer = self.task.getProcessingVersion()
        processingStr = self.task.getProcessingString()
        validStatus = self.workload.getValidStatus()
        inputPath = self.task.getInputDatasetPath()
        globalTag = typeHelper.getGlobalTag()
        prepID = self.task.getPrepID()
        campaign = self.workload.getCampaign()
        cacheUrl, cacheDB, configID = stepHelper.getConfigInfo()

        self.report.setValidStatus(validStatus=validStatus)
        self.report.setGlobalTag(globalTag=globalTag)
        self.report.setCampaign(campaign)
        self.report.setPrepID(prepID)
        self.report.setInputDataset(inputPath=inputPath)
        self.report.setAcquisitionProcessing(acquisitionEra=acquisitionEra,
                                             processingVer=processingVer,
                                             processingStr=processingStr)
        self.report.setConfigURL(configURL="%s;;%s;;%s" %
                                 (cacheUrl, cacheDB, configID))

        # Attach info to files
        self.report.addInfoToOutputFilesForStep(stepName=self.stepName,
                                                step=self.step)

        self.report.checkForOutputFiles(stepName=self.stepName)
        self.report.checkForAdlerChecksum(stepName=self.stepName)
        self.report.checkForRunLumiInformation(stepName=self.stepName)

        if self.step.output.keep != True:
            self.report.killOutput()
        else:
            # Check that we only keep the desired output
            for module in stepHelper.getIgnoredOutputModules():
                self.report.deleteOutputModuleForStep(stepName=self.stepName,
                                                      moduleName=module)

        # Add stageout LFN to existing TFileService files
        reportAnalysisFiles = self.report.getAnalysisFilesFromStep(
            self.stepName)
        for reportAnalysisFile in reportAnalysisFiles:
            newLFN = analysisFileLFN(reportAnalysisFile.fileName,
                                     self.step.user.lfnBase, self.job)
            addAttributesToFile(reportAnalysisFile,
                                pfn=reportAnalysisFile.fileName,
                                lfn=newLFN,
                                validate=False)

        # Add analysis file entries for additional files listed in workflow
        for fileName in stepHelper.listAnalysisFiles():
            analysisFile = stepHelper.getAnalysisFile(fileName)
            if os.path.isfile(analysisFile.fileName):
                newLFN = analysisFileLFN(analysisFile.fileName,
                                         analysisFile.lfnBase, self.job)
                self.report.addAnalysisFile(analysisFile.fileName,
                                            lfn=newLFN,
                                            Source='UserDefined',
                                            pfn=os.path.join(
                                                os.getcwd(),
                                                analysisFile.fileName),
                                            validate=False)

        return
コード例 #6
0
ファイル: WMTask.py プロジェクト: samircury/WMCore
 def steps(self):
     """get WMStep structure"""
     if self.data.steps.topStepName == None:
         return None
     step = getattr(self.data.steps, self.data.steps.topStepName, None)
     return WMStepHelper(step)
コード例 #7
0
            raise WMExecutionFailure(spawnedChild.returncode, "CmsRunFailure",
                                     msg)

        try:
            self.report.parse(jobReportXML, stepName=self.stepName)
        except Exception, ex:
            # Catch it if something goes wrong
            raise WMExecutionFailure(50115, "BadJobReportXML", str(ex))

        #
        # If multicore is enabled, merged the output files and reports
        #
        if multicoreEnabled:
            self.multicoreMerge(scram, applicationStart)

        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()

        acquisitionEra = self.task.getAcquisitionEra()
        processingVer = self.task.getProcessingVersion()
        processingStr = self.task.getProcessingString()
        validStatus = self.workload.getValidStatus()
        inputPath = self.task.getInputDatasetPath()
        globalTag = typeHelper.getGlobalTag()
        prepID = self.workload.getPrepID()
        cacheUrl, cacheDB, configID = stepHelper.getConfigInfo()

        self.report.setValidStatus(validStatus=validStatus)
        self.report.setGlobalTag(globalTag=globalTag)
        self.report.setPrepID(prepID)
        self.report.setInputDataset(inputPath=inputPath)
コード例 #8
0
    def __call__(self, task, wmbsJob):
        """
        _operator(task)_

        Load and run executors for all steps in Task, if an emulator is
        available for that step, use it instead.

        """

        myThread = threading.currentThread

        try:
            myThread.watchdogMonitor.setupMonitors(task, wmbsJob)
            myThread.watchdogMonitor.notifyJobStart(task)
        except WMException:
            self.toTaskDirectory()
            raise
        except Exception as ex:
            msg = "Encountered unhandled exception while starting monitors:\n"
            msg += str(ex) + '\n'
            msg += str(traceback.format_exc()) + '\n'
            logging.error(msg)
            self.toTaskDirectory()
            raise WMExecutionFailure(msg)

        failureUpstream = False
        for step in task.steps().nodeIterator():
            try:
                helper = WMStepHelper(step)
                stepType = helper.stepType()
                stepName = helper.name()
                if failureUpstream:
                    # for chained steps, don't execute further steps if a
                    # failure has already happened
                    helper.addOverride("previousCmsRunFailure", True)

                executor = StepFactory.getStepExecutor(stepType)
                result = self.doExecution(executor, step, wmbsJob)
                logging.info("StepName: %s, StepType: %s, with result: %r",
                             stepName, stepType, result)
                if result:  # can be either None, or the step exit code
                    failureUpstream = True
            except WMException as ex:
                msg = "Encountered error while running ExecuteMaster:\n"
                msg += str(ex) + "\n"
                logging.error(msg)
                self.toTaskDirectory()
                break
            except Exception as ex:
                msg = "Encountered error while running ExecuteMaster:\n"
                msg += str(ex) + "\n"
                msg += str(traceback.format_exc()) + "\n"
                self.toTaskDirectory()
                logging.error(msg)
                break

        try:
            myThread.watchdogMonitor.notifyJobEnd(task)
        except WMException:
            self.toTaskDirectory()
        except Exception as ex:
            msg = "Encountered unhandled exception while ending the job:\n"
            msg += str(ex) + '\n'
            msg += str(traceback.format_exc()) + '\n'
            logging.error(msg)
            self.toTaskDirectory()

        return