コード例 #1
0
    def pre(self, emulator=None):
        """
        _pre_

        Pre execution checks

        """
        if (emulator != None):
            return emulator.emulatePre(self.step)
        logging.info("Pre-executing CMSSW step")
        if hasattr(self.step.application.configuration, 'configCacheUrl'):
            # means we have a configuration & tweak in the sandbox
            psetFile = self.step.application.command.configuration
            psetTweak = self.step.application.command.psetTweak
            self.stepSpace.getFromSandbox(psetFile)

            if psetTweak:
                self.stepSpace.getFromSandbox(psetTweak)

        if hasattr(self.step, "pileup"):
            self.stepSpace.getFromSandbox("pileupconf.json")

        # if step is multicore Enabled, add the multicore setup
        # that generates the input file manifest and per process splitting
        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()
        if typeHelper.multicoreEnabled():
            self.step.runtime.scramPreScripts.append("SetupCMSSWMulticore")

        # add in ths scram env PSet manip script whatever happens
        self.step.runtime.scramPreScripts.append("SetupCMSSWPset")
        return None
コード例 #2
0
ファイル: CMSSW.py プロジェクト: franzoni/WMCore
    def pre(self, emulator = None):
        """
        _pre_

        Pre execution checks

        """
        if (emulator != None):
            return emulator.emulatePre( self.step )
        logging.info("Pre-executing CMSSW step")
        if hasattr(self.step.application.configuration, 'configCacheUrl'):
            # means we have a configuration & tweak in the sandbox
            psetFile = self.step.application.command.configuration
            psetTweak = self.step.application.command.psetTweak
            self.stepSpace.getFromSandbox(psetFile)

            if psetTweak:
                self.stepSpace.getFromSandbox(psetTweak)

        if hasattr(self.step, "pileup"):
            self.stepSpace.getFromSandbox("pileupconf.json")

        # if step is multicore Enabled, add the multicore setup
        # that generates the input file manifest and per process splitting
        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()
        if typeHelper.multicoreEnabled():
            self.step.runtime.scramPreScripts.append("SetupCMSSWMulticore")

        # add in ths scram env PSet manip script whatever happens
        self.step.runtime.scramPreScripts.append("SetupCMSSWPset")
        return None
コード例 #3
0
    def emulateAll(self):
        """
        _emulateAll_

        Traverse all Steps and load up the default Emulator based on
        type.

        """
        for step in self.task.steps().nodeIterator():
            helper = WMStepHelper(step)
            stepType = helper.stepType()
            stepName = helper.name()
            self.addEmulator(stepName, stepType)
コード例 #4
0
    def emulateAll(self):
        """
        _emulateAll_

        Traverse all Steps and load up the default Emulator based on
        type.

        """
        for step in self.task.steps().nodeIterator():
            helper = WMStepHelper(step)
            stepType = helper.stepType()
            stepName = helper.name()
            self.addEmulator(stepName, stepType)
コード例 #5
0
ファイル: WMTask.py プロジェクト: samircury/WMCore
    def setStep(self, wmStep):
        """set topStep to be the step instance provided"""
        stepData = wmStep
        if isinstance(wmStep, WMStepHelper):
            stepData = wmStep.data
            stepHelper = wmStep
        else:
            stepHelper = WMStepHelper(wmStep)

        stepName = stepHelper.name()
        stepHelper.setTopOfTree()
        setattr(self.data.steps, stepName, stepData)
        setattr(self.data.steps, "topStepName", stepName)
        return
コード例 #6
0
ファイル: CMSSW.py プロジェクト: stuartw/WMCore
    def execute(self):
        """
        _emulate_


        """
        self.step.section_("execution")
        self.step.execution.exitStatus = 0
        self.step.emulator.emulatedBy = str(self.__class__.__name__)
        
        helper = WMStepHelper(self.step)
        cmsswStep = helper.getTypeHelper()
        reportMaker = ReportEmu(Job = self.job, WMStep = cmsswStep)
        self.executor.report = reportMaker()
コード例 #7
0
 def setStep(self, wmStep):
     """set topStep to be the step instance provided"""
     stepData = wmStep
     if isinstance(wmStep, WMStepHelper):
         stepData = wmStep.data
         stepHelper = wmStep
     else:
         stepHelper = WMStepHelper(wmStep)
     
     stepName = stepHelper.name()
     stepHelper.setTopOfTree()
     setattr(self.data.steps, stepName, stepData)
     setattr(self.data.steps, "topStepName", stepName)
     return
コード例 #8
0
    def execute(self):
        """
        _emulate_


        """
        self.step.section_("execution")
        self.step.execution.exitStatus = 0
        self.step.emulator.emulatedBy = str(self.__class__.__name__)

        helper = WMStepHelper(self.step)
        cmsswStep = helper.getTypeHelper()
        reportMaker = ReportEmu(Job=self.job, WMStep=cmsswStep)
        self.executor.report = reportMaker()
コード例 #9
0
    def pre(self, emulator = None):
        """
        _pre_

        Pre execution checks

        """
        if (emulator != None):
            return emulator.emulatePre( self.step )
        logging.info("Pre-executing CMSSW step")
        if hasattr(self.step.application.configuration, 'configCacheUrl'):
            # means we have a configuration & tweak in the sandbox
            psetFile = self.step.application.command.configuration
            psetTweak = self.step.application.command.psetTweak
            self.stepSpace.getFromSandbox(psetFile)

            if psetTweak:
                self.stepSpace.getFromSandbox(psetTweak)

        if hasattr(self.step, "pileup"):
            self.stepSpace.getFromSandbox("pileupconf.json")

        stepHelper = WMStepHelper(self.step)

        # add in ths scram env PSet manip script whatever happens
        self.step.runtime.scramPreScripts.append("SetupCMSSWPset")
        return None
コード例 #10
0
    def stepStart(self, step):
        """
        _stepStart_

        Fill with the step-based information. If it is the first step, report
        that the job started its execution.
        """

        helper = WMStepHelper(step)
        self.stepCount += 1

        data = None
        if not self.jobStarted:
            #It's the first step so let's send the exe that started and where
            #That's what they request
            data = {}
            data['MessageType']       = 'jobRuntime-jobStart'
            data['MessageTS']         = time.strftime(self.tsFormat,
                                                      time.gmtime())
            data['taskId']            = self.taskName
            data['jobId']             = self.jobName
            data['GridName']          = getUserProxyDN()
            data['ExeStart']          = helper.name()
            data['SyncCE']            = getSyncCE()
            data['WNHostName']        = socket.gethostname()

            self.publish(data = data)
            self.jobStarted = True

        #Now let's send the step information
        tmp = {'jobStart': data}

        data = {}
        data['MessageType'] = 'jobRuntime-stepStart'
        data['MessageTS']   = time.strftime(self.tsFormat, time.gmtime())
        data['taskId']      = self.taskName
        data['jobId']       = self.jobName
        data['%d_stepName' % self.stepCount]    = helper.name()
        data['%d_ExeStart' % self.stepCount]    = helper.name()

        self.publish(data = data)

        data.update(tmp)

        return data
コード例 #11
0
    def stepStart(self, step):
        """
        _stepStart_

        Fill with the step-based information. If it is the first step, report
        that the job started its execution.
        """

        helper = WMStepHelper(step)
        self.stepCount += 1

        data = None
        if not self.jobStarted:
            #It's the first step so let's send the exe that started and where
            #That's what they request
            data = {}
            data['MessageType'] = 'jobRuntime-jobStart'
            data['MessageTS'] = time.strftime(self.tsFormat, time.gmtime())
            data['taskId'] = self.taskName
            data['jobId'] = self.jobName
            data['GridName'] = getUserProxyDN()
            data['ExeStart'] = helper.name()
            data['SyncCE'] = getSyncCE()
            data['WNHostName'] = socket.gethostname()

            self.publish(data=data)
            self.jobStarted = True

        #Now let's send the step information
        tmp = {'jobStart': data}

        data = {}
        data['MessageType'] = 'jobRuntime-stepStart'
        data['MessageTS'] = time.strftime(self.tsFormat, time.gmtime())
        data['taskId'] = self.taskName
        data['jobId'] = self.jobName
        data['%d_stepName' % self.stepCount] = helper.name()
        data['%d_ExeStart' % self.stepCount] = helper.name()

        self.publish(data=data)

        data.update(tmp)

        return data
コード例 #12
0
    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Fill with step-ending information
        """
        helper = WMStepHelper(step)

        stepSuccess = stepReport.getStepExitCode(stepName = helper.name())
        if self.jobSuccess == 0:
            self.jobSuccess = int(stepSuccess)
        if int(stepSuccess) != 0:
            self.failedStep = helper.name()

        data = {}
        data['MessageType']              = 'jobRuntime-stepEnd'
        data['MessageTS']                = time.strftime(self.tsFormat,
                                                         time.gmtime())
        data['taskId']                   = self.taskName
        data['jobId']                    = self.jobName
        data['ExeEnd']                   = helper.name()
        data['ExeExitCode']              = stepReport.getStepExitCode(
                                                       stepName = helper.name())
        if helper.name() == 'StageOut':
            data['StageOutExitStatus']   = int(stepReport.stepSuccessful(
                                                      stepName = helper.name()))

        times = stepReport.getTimes(stepName = helper.name())
        data['ExeWCTime'] = times['stopTime'] - times['startTime']

        step = stepReport.retrieveStep(step = helper.name())

        if hasattr(step, 'performance'):
            if hasattr(step.performance, 'cpu'):
                data['ExeCPUTime'] = getattr(step.performance.cpu,
                                             'TotalJobCPU', 0)
                self.WrapperCPUTime += float(data['ExeCPUTime'])

        self.WrapperWCTime += data['ExeWCTime']
        self.lastStep = helper.name()

        self.publish(data = data)

        return data
コード例 #13
0
ファイル: WMTask.py プロジェクト: samircury/WMCore
    def makeStep(self, stepName):
        """
        _makeStep_

        create a new WMStep instance, install it as the top step and
        return the reference to the new step wrapped in a StepHelper

        """
        newStep = WMStep(stepName)
        self.setStep(newStep)
        return WMStepHelper(newStep)
コード例 #14
0
ファイル: PerformanceMonitor.py プロジェクト: vytjan/WMCore
    def stepStart(self, step):
        """
        _stepStart_

        Assure that the monitor is pointing at the right step
        """

        self.stepHelper = WMStepHelper(step)
        self.currentStepName = getStepName(step)
        self.currentStepSpace = None

        if not self.stepHelper.stepType() in self.watchStepTypes:
            self.disableStep = True
            logging.debug("PerformanceMonitor ignoring step of type %s", self.stepHelper.stepType())
            return
        else:
            logging.debug("Beginning PeformanceMonitor step Initialization")
            self.disableStep = False

        return
コード例 #15
0
    def stepStart(self, step):
        """
        _stepStart_

        Fill with the step-based information
        """

        helper = WMStepHelper(step)

        data = {}
        data['MessageType']   = 'jobRuntime'
        data['MessageTS']     = time.time()
        data['taskId']        = self.taskName
        data['jobId']         = self.jobName
        data['retryCount']    = self.job['retry_count']
        data['ExeStart']      = helper.name()
        
        self.publish(data = data)

        return data
コード例 #16
0
    def stepStart(self, step):
        """
        _stepStart_

        Assure that the monitor is pointing at the right step
        """

        stepHelper = WMStepHelper(step)
        self.currentStepName  = getStepName(step)
        self.currentStepSpace = getStepSpace(stepHelper.name())

        if not stepHelper.stepType() in self.watchStepTypes:
            self.disableStep = True
            logging.debug("PerformanceMonitor ignoring step of type %s" % stepHelper.stepType())
            return
        else:
            logging.debug("Beginning PeformanceMonitor step Initialization")
            self.disableStep = False


        return
コード例 #17
0
    def stepKilled(self, step):
        """
        _stepKilled_

        Fill with step-ending information assuming utter failure
        """

        helper = WMStepHelper(step)

        data = {}
        data['MessageType']   = 'jobRuntime-stepKilled'
        data['MessageTS']     = time.strftime(self.tsFormat, time.gmtime())
        data['taskId']        = self.taskName
        data['jobId']         = self.jobName
        data['ExeEnd']        = helper.name()

        self.lastStep = helper.name()

        self.publish(data = data)

        return data
コード例 #18
0
    def stepKilled(self, step):
        """
        _stepKilled_

        Fill with step-ending information assuming utter failure
        """

        helper = WMStepHelper(step)

        data = {}
        data['MessageType'] = 'jobRuntime-stepKilled'
        data['MessageTS'] = time.strftime(self.tsFormat, time.gmtime())
        data['taskId'] = self.taskName
        data['jobId'] = self.jobName
        data['%d_ExeEnd' % self.stepCount] = helper.name()

        self.lastStep = helper.name()

        self.publish(data=data)

        return data
コード例 #19
0
    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Fill with step-ending information
        """
        helper = WMStepHelper(step)

        stepSuccess = stepReport.getStepExitCode(stepName = helper.name())
        if self.jobSuccess == 0:
            self.jobSuccess = int(stepSuccess)


        data = {}
        data['MessageType']              = 'jobRuntime'
        data['MessageTS']                = time.time()
        data['taskId']                   = self.taskName
        data['jobId']                    = self.jobName
        data['retryCount']               = self.job['retry_count']
        data['ExeEnd']                   = helper.name()
        data['ExeExitCode']              = stepReport.getStepExitCode(stepName = helper.name())
        if helper.name() == 'StageOut':
            data['StageOutExitStatus']       = stepReport.stepSuccessful(stepName = helper.name())

        
        self.publish(data = data)


        return
コード例 #20
0
ファイル: ExecuteMaster.py プロジェクト: ticoann/WMCore
    def toStepDirectory(self, step):
        """
        _toStepDirectory_

        Switch current working directory to the step location
        within WMTaskSpace

        """
        stepName = WMStepHelper(step).name()
        from WMTaskSpace import taskSpace
        stepSpace = taskSpace.stepSpace(stepName)

        os.chdir(stepSpace.location)
コード例 #21
0
    def stepKilled(self, step):
        """
        _stepKilled_

        Fill with step-ending information assuming utter failure
        """

        helper = WMStepHelper(step)

        data = {}
        data['MessageType']   = 'jobRuntime'
        data['MessageTS']     = time.time()
        data['taskId']        = self.taskName
        data['jobId']         = self.jobName
        data['retryCount']    = self.job['retry_count']
        data['ExeEnd']        = helper.name()
        
        
        self.publish(data = data)


        return
コード例 #22
0
ファイル: WMRuntimeMonitor.py プロジェクト: vytjan/WMCore
#!/usr/bin/env python
"""
_WMRuntimeMonitor_

This is the base class for monitors
"""

import threading
import os

from WMCore.WMException import WMException
from WMCore.WMSpec.Steps.Executor import getStepSpace
from WMCore.WMSpec.WMStep import WMStepHelper

getStepName = lambda step: WMStepHelper(step).name()


class WMRuntimeMonitorException(WMException):
    """
    _StepFactortyException_

    It's like an exception class that does nothing

    """
    pass


class WMRuntimeMonitor:
    def __init__(self):
        self.currentStep = None
        self.currentStepName = None
コード例 #23
0
ファイル: ExecuteMaster.py プロジェクト: ticoann/WMCore
class ExecuteMaster:
    """
    _ExecuteMaster_

    Traverse the given task and invoke the execute framework
    If an emulator is provided, then invoke the appropriate emulator
    instead of the executor

    """
    def __init__(self):
        pass

    def __call__(self, task, wmbsJob):
        """
        _operator(task)_

        Load and run executors for all steps in Task, if an emulator is
        available for that step, use it instead.

        """

        myThread = threading.currentThread

        try:
            myThread.watchdogMonitor.setupMonitors(task, wmbsJob)
            myThread.watchdogMonitor.notifyJobStart(task)
        except WMException:
            self.toTaskDirectory()
            raise
        except Exception, ex:
            msg = "Encountered unhandled exception while starting monitors:\n"
            msg += str(ex) + '\n'
            msg += str(traceback.format_exc()) + '\n'
            logging.error(msg)
            self.toTaskDirectory()
            raise WMExecutionFailure(msg)

        skipToStep = None
        for step in task.steps().nodeIterator():
            try:
                helper = WMStepHelper(step)
                stepType = helper.stepType()
                stepName = helper.name()
                if skipToStep and skipToStep != stepName:
                    # Then we continue until we get to the required step
                    continue
                skipToStep = None  # Reset this when we get to the right step
                executor = StepFactory.getStepExecutor(stepType)
                result = self.doExecution(executor, step, wmbsJob)
                if not result == None:
                    skipToStep = result
            except WMException, ex:
                self.toTaskDirectory()
                break
            except Exception, ex:
                msg = "Encountered error while running ExecuteMaster:\n"
                msg += str(ex) + "\n"
                msg += str(traceback.format_exc()) + "\n"
                self.toTaskDirectory()
                logging.error(msg)
                break
コード例 #24
0
"""
from __future__ import absolute_import

import json
import logging
import os
import subprocess
import sys

from Utils.FileTools import getFullPath
from Utils.Utilities import zipEncodeStr
from WMCore.FwkJobReport.Report import Report
from WMCore.WMSpec.Steps.StepFactory import getStepEmulator
from WMCore.WMSpec.WMStep import WMStepHelper

getStepName = lambda step: WMStepHelper(step).name()
getStepErrorDestination = lambda step: WMStepHelper(step).getErrorDestinationStep()


def getStepSpace(stepName):
    """
    _getStepSpace_

    Util to get the runtime step space.
    This imports dynamic runtime libraries so be careful how
    you use it

    """
    modName = "WMTaskSpace"
    if modName in sys.modules.keys():
        taskspace = sys.modules[modName]
コード例 #25
0
    def execute(self, emulator=None):
        """
        _execute_


        """
        stepModule = "WMTaskSpace.%s" % self.stepName
        if emulator is not None:
            return emulator.emulate(self.step, self.job)

        # write the wrapper script to a temporary location
        # I don't pass it directly through os.system because I don't
        # trust that there won't be shell-escape shenanigans with
        # arbitrary input files
        scramSetup = self.step.application.setup.softwareEnvironment
        scramCommand = self.step.application.setup.scramCommand
        scramProject = self.step.application.setup.scramProject
        scramArch = self.step.application.setup.scramArch
        cmsswVersion = self.step.application.setup.cmsswVersion
        jobReportXML = self.step.output.jobReport
        cmsswCommand = self.step.application.command.executable
        cmsswConfig = self.step.application.command.configuration
        cmsswArguments = self.step.application.command.arguments
        userTarball = ','.join(self.step.user.inputSandboxes)
        userFiles = ','.join(self.step.user.userFiles)
        logging.info('User files are %s', userFiles)
        logging.info('User sandboxes are %s', userTarball)

        scramArch = getSingleScramArch(scramArch)

        multicoreSettings = self.step.application.multicore
        try:
            logging.info("CMSSW configured for %s cores and %s event streams",
                         multicoreSettings.numberOfCores,
                         multicoreSettings.eventStreams)
        except AttributeError:
            logging.info(
                "No value set for multicore numberOfCores or eventStreams")

        logging.info("Executing CMSSW step")

        #
        # set any global environment variables
        #
        try:
            os.environ['FRONTIER_ID'] = 'wmagent_%s' % (
                self.report.data.workload)
        except Exception as ex:
            logging.error('Have critical error in setting FRONTIER_ID: %s',
                          str(ex))
            logging.error(
                'Continuing, as this is not a critical function yet.')

        #
        # scram bootstrap
        #
        scram = Scram(
            command=scramCommand,
            version=cmsswVersion,
            initialise=self.step.application.setup.softwareEnvironment,
            directory=self.step.builder.workingDir,
            architecture=scramArch,
        )

        logging.info("Runing SCRAM")
        try:
            projectOutcome = scram.project()
        except Exception as ex:
            msg = "Exception raised while running scram.\n"
            msg += str(ex)
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)

        if projectOutcome > 0:
            msg = scram.diagnostic()
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)
        runtimeOutcome = scram.runtime()
        if runtimeOutcome > 0:
            msg = scram.diagnostic()
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)

        #
        # pre scripts
        #
        logging.info("Running PRE scripts")
        for script in self.step.runtime.preScripts:
            # TODO: Exception handling and error handling & logging
            scriptProcess = subprocess.Popen(
                ["/bin/bash"],
                shell=True,
                cwd=self.step.builder.workingDir,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )
            # BADPYTHON
            scriptProcess.stdin.write(
                "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH\n")
            invokeCommand = "%s -m WMCore.WMRuntime.ScriptInvoke %s %s \n" % (
                sys.executable, stepModule, script)
            logging.info("    Invoking command: %s", invokeCommand)
            scriptProcess.stdin.write(invokeCommand)
            stdout, stderr = scriptProcess.communicate()
            retCode = scriptProcess.returncode
            if retCode > 0:
                msg = "Error running command\n%s\n" % invokeCommand
                msg += "%s\n %s\n %s\n" % (retCode, stdout, stderr)
                logging.critical("Error running command")
                logging.critical(msg)
                raise WMExecutionFailure(50513, "PreScriptFailure", msg)

        #
        # pre scripts with scram
        #
        logging.info("RUNNING SCRAM SCRIPTS")
        for script in self.step.runtime.scramPreScripts:
            # invoke scripts with scram()
            runtimeDir = getattr(self.step.runtime, 'scramPreDir', None)
            invokeCommand = self.step.runtime.invokeCommand if hasattr(self.step.runtime, 'invokeCommand') else \
                "%s -m WMCore.WMRuntime.ScriptInvoke %s" % (sys.executable, stepModule)
            invokeCommand += " %s \n" % script
            retCode = scram(invokeCommand, runtimeDir=runtimeDir)
            if retCode > 0:
                msg = "Error running command\n%s\n" % invokeCommand
                msg += "%s\n " % retCode
                msg += scram.diagnostic()
                logging.critical(msg)
                raise WMExecutionFailure(50513, "PreScriptScramFailure", msg)

        configPath = "%s/%s-main.sh" % (self.step.builder.workingDir,
                                        self.stepName)
        handle = open(configPath, 'w')
        handle.write(CONFIG_BLOB)
        handle.close()

        # spawn this new process
        # the script looks for:
        # <SCRAM_COMMAND> <SCRAM_PROJECT> <CMSSW_VERSION> <JOB_REPORT> <EXECUTABLE> <CONFIG>
        # open the output files
        stdoutHandle = open(self.step.output.stdout, 'w')
        stderrHandle = open(self.step.output.stderr, 'w')
        args = [
            '/bin/bash', configPath, scramSetup, scramArch, scramCommand,
            scramProject, cmsswVersion, jobReportXML, cmsswCommand,
            cmsswConfig, userTarball, userFiles, cmsswArguments
        ]
        logging.info("Executing CMSSW. args: %s", args)

        # possibly needed environment overrides for CMSSW call go here
        envOverride = {}
        # work around problem with GSI authentication plugin and EOS at CERN
        if socket.getfqdn().endswith("cern.ch"):
            envOverride['XRD_LOADBALANCERTTL'] = "86400"
        # some libraries linked with CMSSW need HOME in the environment
        if 'HOME' not in os.environ:
            envOverride['HOME'] = os.environ.get('PWD', "/")

        os.environ.update(envOverride)

        returncode = subprocess.call(args,
                                     stdout=stdoutHandle,
                                     stderr=stderrHandle)

        self.setCondorChirpAttrDelayed('Chirp_WMCore_cmsRun_ExitCode',
                                       returncode)
        self.setCondorChirpAttrDelayed(
            'Chirp_WMCore_%s_ExitCode' % self.stepName, returncode)

        stdoutHandle.close()
        stderrHandle.close()

        self.step.execution.exitStatus = returncode
        argsDump = {'arguments': args}

        if returncode != 0:
            msg = "Error running cmsRun\n%s\n" % argsDump
            msg += "Return code: %s\n" % returncode
            logging.critical(msg)
            raise WMExecutionFailure(returncode, "CmsRunFailure", msg)

        try:
            self.report.parse(jobReportXML, stepName=self.stepName)
        except Exception as ex:
            # Catch it if something goes wrong
            raise WMExecutionFailure(50115, "BadJobReportXML", str(ex))

        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()

        acquisitionEra = self.task.getAcquisitionEra()
        processingVer = self.task.getProcessingVersion()
        processingStr = self.task.getProcessingString()
        validStatus = self.workload.getValidStatus()
        inputPath = self.task.getInputDatasetPath()
        globalTag = typeHelper.getGlobalTag()
        prepID = self.task.getPrepID()
        campaign = self.workload.getCampaign()
        cacheUrl, cacheDB, configID = stepHelper.getConfigInfo()

        self.report.setValidStatus(validStatus=validStatus)
        self.report.setGlobalTag(globalTag=globalTag)
        self.report.setCampaign(campaign)
        self.report.setPrepID(prepID)
        self.report.setInputDataset(inputPath=inputPath)
        self.report.setAcquisitionProcessing(acquisitionEra=acquisitionEra,
                                             processingVer=processingVer,
                                             processingStr=processingStr)
        self.report.setConfigURL(configURL="%s;;%s;;%s" %
                                 (cacheUrl, cacheDB, configID))

        # Attach info to files
        self.report.addInfoToOutputFilesForStep(stepName=self.stepName,
                                                step=self.step)

        self.report.checkForOutputFiles(stepName=self.stepName)
        self.report.checkForAdlerChecksum(stepName=self.stepName)
        self.report.checkForRunLumiInformation(stepName=self.stepName)

        if self.step.output.keep != True:
            self.report.killOutput()
        else:
            # Check that we only keep the desired output
            for module in stepHelper.getIgnoredOutputModules():
                self.report.deleteOutputModuleForStep(stepName=self.stepName,
                                                      moduleName=module)

        # Add stageout LFN to existing TFileService files
        reportAnalysisFiles = self.report.getAnalysisFilesFromStep(
            self.stepName)
        for reportAnalysisFile in reportAnalysisFiles:
            newLFN = analysisFileLFN(reportAnalysisFile.fileName,
                                     self.step.user.lfnBase, self.job)
            addAttributesToFile(reportAnalysisFile,
                                pfn=reportAnalysisFile.fileName,
                                lfn=newLFN,
                                validate=False)

        # Add analysis file entries for additional files listed in workflow
        for fileName in stepHelper.listAnalysisFiles():
            analysisFile = stepHelper.getAnalysisFile(fileName)
            if os.path.isfile(analysisFile.fileName):
                newLFN = analysisFileLFN(analysisFile.fileName,
                                         analysisFile.lfnBase, self.job)
                self.report.addAnalysisFile(analysisFile.fileName,
                                            lfn=newLFN,
                                            Source='UserDefined',
                                            pfn=os.path.join(
                                                os.getcwd(),
                                                analysisFile.fileName),
                                            validate=False)

        return
コード例 #26
0
ファイル: PerformanceMonitor.py プロジェクト: amaltaro/WMCore
class PerformanceMonitor(WMRuntimeMonitor):
    """
    _PerformanceMonitor_

    Monitors the performance by pinging ps and
    recording data regarding the current step
    """

    def __init__(self):
        """
        Actual variable initialization
        in initMonitor
        """

        self.pid = None
        self.uid = os.getuid()
        self.monitorBase = "ps -p %i -o pid,ppid,rss,vsize,pcpu,pmem,cmd -ww | grep %i"
        self.monitorCommand = None
        self.currentStepSpace = None
        self.currentStepName = None

        self.rss = []
        self.vsize = []
        self.pcpu = []
        self.pmem = []

        self.maxRSS = None
        self.maxVSize = None
        self.softTimeout = None
        self.hardTimeout = None
        self.logPath = None
        self.startTime = None

        self.watchStepTypes = []

        self.disableStep = False

        WMRuntimeMonitor.__init__(self)

        return

    def initMonitor(self, task, job, logPath, args=None):
        """
        _initMonitor_

        Puts together the information needed for the monitoring
        to actually find everything.
        """
        args = args or {}

        # Set the steps we want to watch
        self.watchStepTypes = args.get('WatchStepTypes', ['CMSSW', 'PerfTest'])

        self.maxRSS = args.get('maxRSS', None)
        self.maxVSize = args.get('maxVSize', None)
        self.softTimeout = args.get('softTimeout', None)
        self.hardTimeout = args.get('hardTimeout', None)
        self.numOfCores = args.get('cores', None)

        self.logPath = os.path.join(logPath)

        return

    def jobStart(self, task):
        """
        _jobStart_

        Acknowledge that the job has started and initialize the time
        """
        self.startTime = time.time()

        return

    def stepStart(self, step):
        """
        _stepStart_

        Assure that the monitor is pointing at the right step
        """

        self.stepHelper = WMStepHelper(step)
        self.currentStepName = getStepName(step)
        self.currentStepSpace = None

        if not self.stepHelper.stepType() in self.watchStepTypes:
            self.disableStep = True
            logging.debug("PerformanceMonitor ignoring step of type %s", self.stepHelper.stepType())
            return
        else:
            logging.debug("Beginning PeformanceMonitor step Initialization")
            self.disableStep = False

        return

    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Package the information and send it off
        """

        if not self.disableStep:
            # No information to correlate
            return

        self.currentStepName = None
        self.currentStepSpace = None

        return

    def periodicUpdate(self):
        """
        Run on the defined intervals.

        """
        killProc = False
        killHard = False
        reason = ''
        errorCodeLookup = {'RSS': 50660,
                           'VSZ': 50661,
                           'Wallclock time': 50664,
                           '': 99999}

        if self.disableStep:
            # Then we aren't doing CPU monitoring
            # on this step
            return

        if self.currentStepName is None:
            # We're between steps
            return

        if self.currentStepSpace is None:
            # Then build the step space
            self.currentStepSpace = getStepSpace(self.stepHelper.name())

        stepPID = getStepPID(self.currentStepSpace, self.currentStepName)

        if stepPID is None:
            # Then we have no step PID, we can do nothing
            return

        # Now we run the monitor command and collate the data
        cmd = self.monitorBase % (stepPID, stepPID)
        stdout, stderr, retcode = subprocessAlgos.runCommand(cmd)

        output = stdout.split()
        if not len(output) > 7:
            # Then something went wrong in getting the ps data
            msg = "Error when grabbing output from process ps\n"
            msg += "output = %s\n" % output
            msg += "command = %s\n" % self.monitorCommand
            logging.error(msg)
            return
        # FIXME: making it backwards compatible. Keep only the "else" block in HG1801
        if self.maxRSS is not None and self.maxRSS >= (1024 * 1024):
            # then workload value is still in KiB (old way)
            rss = int(output[2])
            vsize = int(output[3])
        else:
            rss = int(output[2]) // 1024  # convert it to MiB
            vsize = int(output[3]) // 1024  # convert it to MiB
        logging.info("Retrieved following performance figures:")
        logging.info("RSS: %s;  VSize: %s; PCPU: %s; PMEM: %s", output[2], output[3],
                     output[4], output[5])

        msg = 'Error in CMSSW step %s\n' % self.currentStepName
        msg += 'Number of Cores: %s\n' % self.numOfCores

        if self.maxRSS is not None and rss >= self.maxRSS:
            msg += "Job has exceeded maxRSS: %s\n" % self.maxRSS
            msg += "Job has RSS: %s\n" % rss
            killProc = True
            reason = 'RSS'
        elif self.maxVSize is not None and vsize >= self.maxVSize:
            msg += "Job has exceeded maxVSize: %s\n" % self.maxVSize
            msg += "Job has VSize: %s\n" % vsize
            killProc = True
            reason = 'VSZ'
        elif self.hardTimeout is not None and self.softTimeout is not None:
            currentTime = time.time()
            if (currentTime - self.startTime) > self.softTimeout:
                killProc = True
                reason = 'Wallclock time'
                msg += "Job has been running for more than: %s\n" % str(self.softTimeout)
                msg += "Job has been running for: %s\n" % str(currentTime - self.startTime)
            if (currentTime - self.startTime) > self.hardTimeout:
                killHard = True
                msg += "Job exceeded soft timeout"

        if killProc:
            logging.error(msg)
            report = Report.Report()
            # Find the global report
            logPath = os.path.join(self.currentStepSpace.location,
                                   '../../../',
                                   os.path.basename(self.logPath))
            try:
                if os.path.isfile(logPath):
                    # We should be able to find existant job report.
                    # If not, we're in trouble
                    logging.debug("Found pre-existant error report in PerformanceMonitor termination.")
                    report.load(logPath)
                # Create a new step that won't be overridden by an exiting CMSSW
                if not report.retrieveStep(step="PerformanceError"):
                    report.addStep(reportname="PerformanceError")
                report.addError(stepName="PerformanceError", exitCode=errorCodeLookup[reason],
                                errorType="PerformanceKill", errorDetails=msg)
                report.save(logPath)
            except Exception as ex:
                # Basically, we can't write a log report and we're hosed
                # Kill anyway, and hope the logging file gets written out
                msg2 = "Exception while writing out jobReport.\n"
                msg2 += "Aborting job anyway: unlikely you'll get any error report.\n"
                msg2 += str(ex)
                msg2 += str(traceback.format_exc()) + '\n'
                logging.error(msg2)

            try:
                if not killHard:
                    logging.error("Attempting to kill step using SIGUSR2")
                    os.kill(stepPID, signal.SIGUSR2)
                else:
                    logging.error("Attempting to kill step using SIGTERM")
                    os.kill(stepPID, signal.SIGTERM)
            except Exception:
                logging.error("Attempting to kill step using SIGTERM")
                os.kill(stepPID, signal.SIGTERM)

        return
コード例 #27
0
    def __call__(self, task, wmbsJob):
        """
        _operator(task)_

        Load and run executors for all steps in Task, if an emulator is
        available for that step, use it instead.

        """

        myThread = threading.currentThread

        try:
            myThread.watchdogMonitor.setupMonitors(task, wmbsJob)
            myThread.watchdogMonitor.notifyJobStart(task)
        except WMException:
            self.toTaskDirectory()
            raise
        except Exception as ex:
            msg = "Encountered unhandled exception while starting monitors:\n"
            msg += str(ex) + '\n'
            msg += str(traceback.format_exc()) + '\n'
            logging.error(msg)
            self.toTaskDirectory()
            raise WMExecutionFailure(msg)

        failureUpstream = False
        for step in task.steps().nodeIterator():
            try:
                helper = WMStepHelper(step)
                stepType = helper.stepType()
                stepName = helper.name()
                if failureUpstream:
                    # for chained steps, don't execute further steps if a
                    # failure has already happened
                    helper.addOverride("previousCmsRunFailure", True)

                executor = StepFactory.getStepExecutor(stepType)
                result = self.doExecution(executor, step, wmbsJob)
                logging.info("StepName: %s, StepType: %s, with result: %r",
                             stepName, stepType, result)
                if result:  # can be either None, or the step exit code
                    failureUpstream = True
            except WMException as ex:
                msg = "Encountered error while running ExecuteMaster:\n"
                msg += str(ex) + "\n"
                logging.error(msg)
                self.toTaskDirectory()
                break
            except Exception as ex:
                msg = "Encountered error while running ExecuteMaster:\n"
                msg += str(ex) + "\n"
                msg += str(traceback.format_exc()) + "\n"
                self.toTaskDirectory()
                logging.error(msg)
                break

        try:
            myThread.watchdogMonitor.notifyJobEnd(task)
        except WMException:
            self.toTaskDirectory()
        except Exception as ex:
            msg = "Encountered unhandled exception while ending the job:\n"
            msg += str(ex) + '\n'
            msg += str(traceback.format_exc()) + '\n'
            logging.error(msg)
            self.toTaskDirectory()

        return
コード例 #28
0
class PerformanceMonitor(WMRuntimeMonitor):
    """
    _PerformanceMonitor_

    Monitors the performance by pinging ps and
    recording data regarding the current step
    """
    def __init__(self):
        """
        Actual variable initialization
        in initMonitor
        """

        self.pid = None
        self.uid = os.getuid()
        self.monitorBase = "ps -p %i -o pid,ppid,rss,vsize,pcpu,pmem,cmd -ww | grep %i"
        self.monitorCommand = None
        self.currentStepSpace = None
        self.currentStepName = None

        self.rss = []
        self.vsize = []
        self.pcpu = []
        self.pmem = []

        self.maxRSS = None
        self.maxVSize = None
        self.softTimeout = None
        self.hardTimeout = None
        self.logPath = None
        self.startTime = None

        self.watchStepTypes = []

        self.disableStep = False

        WMRuntimeMonitor.__init__(self)

        return

    def initMonitor(self, task, job, logPath, args={}):
        """
        _initMonitor_

        Puts together the information needed for the monitoring
        to actually find everything.
        """

        # Set the steps we want to watch
        self.watchStepTypes = args.get('WatchStepTypes', ['CMSSW', 'PerfTest'])

        self.maxRSS = args.get('maxRSS', None)
        self.maxVSize = args.get('maxVSize', None)
        self.softTimeout = args.get('softTimeout', None)
        self.hardTimeout = args.get('hardTimeout', None)

        self.logPath = os.path.join(logPath)

        return

    def jobStart(self, task):
        """
        _jobStart_

        Acknowledge that the job has started and initialize the time
        """
        self.startTime = time.time()

        return

    def stepStart(self, step):
        """
        _stepStart_

        Assure that the monitor is pointing at the right step
        """

        self.stepHelper = WMStepHelper(step)
        self.currentStepName = getStepName(step)
        self.currentStepSpace = None

        if not self.stepHelper.stepType() in self.watchStepTypes:
            self.disableStep = True
            logging.debug("PerformanceMonitor ignoring step of type %s" %
                          self.stepHelper.stepType())
            return
        else:
            logging.debug("Beginning PeformanceMonitor step Initialization")
            self.disableStep = False

        return

    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Package the information and send it off
        """

        if not self.disableStep:
            # No information to correlate
            return

        self.currentStepName = None
        self.currentStepSpace = None

        return

    def periodicUpdate(self):
        """
        Run on the defined intervals.

        """
        killProc = False
        killHard = False

        if self.disableStep:
            # Then we aren't doing CPU monitoring
            # on this step
            return

        if self.currentStepName == None:
            # We're between steps
            return

        if self.currentStepSpace == None:
            # Then build the step space
            self.currentStepSpace = getStepSpace(self.stepHelper.name())

        stepPID = getStepPID(self.currentStepSpace, self.currentStepName)

        if stepPID == None:
            # Then we have no step PID, we can do nothing
            return

        # Now we run the monitor command and collate the data
        cmd = self.monitorBase % (stepPID, stepPID)
        stdout, stderr, retcode = subprocessAlgos.runCommand(cmd)

        output = stdout.split()
        if not len(output) > 7:
            # Then something went wrong in getting the ps data
            msg = "Error when grabbing output from process ps\n"
            msg += "output = %s\n" % output
            msg += "command = %s\n" % self.monitorCommand
            logging.error(msg)
            return
        rss = float(output[2])
        vsize = float(output[3])
        logging.info("Retrieved following performance figures:")
        logging.info("RSS: %s;  VSize: %s; PCPU: %s; PMEM: %s" %
                     (output[2], output[3], output[4], output[5]))

        msg = 'Error in CMSSW step %s\n' % self.currentStepName
        if self.maxRSS != None and rss >= self.maxRSS:
            msg += "Job has exceeded maxRSS: %s\n" % self.maxRSS
            msg += "Job has RSS: %s\n" % rss
            killProc = True
        if self.maxVSize != None and vsize >= self.maxVSize:
            msg += "Job has exceeded maxVSize: %s\n" % self.maxVSize
            msg += "Job has VSize: %s\n" % vsize
            killProc = True

        #Let's check the running time
        currentTime = time.time()

        if self.hardTimeout != None and self.softTimeout != None:
            if (currentTime - self.startTime) > self.softTimeout:
                killProc = True
                msg += "Job has been running for more than: %s\n" % str(
                    self.softTimeout)
                msg += "Job has been running for: %s\n" % str(currentTime -
                                                              self.startTime)
            if (currentTime - self.startTime) > self.hardTimeout:
                killHard = True
                msg += "Job exceeded soft timeout"

        if killProc:
            logging.error(msg)
            report = Report.Report()
            # Find the global report
            logPath = os.path.join(self.currentStepSpace.location, '../../../',
                                   os.path.basename(self.logPath))
            try:
                if os.path.isfile(logPath):
                    # We should be able to find existant job report.
                    # If not, we're in trouble
                    logging.debug(
                        "Found pre-existant error report in PerformanceMonitor termination."
                    )
                    report.load(logPath)
                # Create a new step that won't be overridden by an exiting CMSSW
                if not report.retrieveStep(step="PerformanceError"):
                    report.addStep(reportname="PerformanceError")
                report.addError(stepName="PerformanceError",
                                exitCode=99900,
                                errorType="PerformanceKill",
                                errorDetails=msg)
                report.save(logPath)
            except Exception, ex:
                # Basically, we can't write a log report and we're hosed
                # Kill anyway, and hope the logging file gets written out
                msg2 = "Exception while writing out jobReport.\n"
                msg2 += "Aborting job anyway: unlikely you'll get any error report.\n"
                msg2 += str(ex)
                msg2 += str(traceback.format_exc()) + '\n'
                logging.error(msg2)

            try:
                if not killHard:
                    logging.error("Attempting to kill step using SIGUSR2")
                    os.kill(stepPID, signal.SIGUSR2)
                else:
                    logging.error("Attempting to kill step using SIGTERM")
                    os.kill(stepPID, signal.SIGTERM)
            except Exception:
                logging.error("Attempting to kill step using SIGTERM")
                os.kill(stepPID, signal.SIGTERM)

        return
コード例 #29
0
ファイル: ExecuteMaster.py プロジェクト: BrunoCoimbra/WMCore
    def __call__(self, task, wmbsJob):
        """
        _operator(task)_

        Load and run executors for all steps in Task, if an emulator is
        available for that step, use it instead.

        """

        myThread = threading.currentThread

        try:
            myThread.watchdogMonitor.setupMonitors(task, wmbsJob)
            myThread.watchdogMonitor.notifyJobStart(task)
        except WMException:
            self.toTaskDirectory()
            raise
        except Exception as ex:
            msg = "Encountered unhandled exception while starting monitors:\n"
            msg += str(ex) + '\n'
            msg += str(traceback.format_exc()) + '\n'
            logging.error(msg)
            self.toTaskDirectory()
            raise WMExecutionFailure(msg)

        skipToStep = None
        for step in task.steps().nodeIterator():
            try:
                helper = WMStepHelper(step)
                stepType = helper.stepType()
                stepName = helper.name()
                if skipToStep and skipToStep != stepName:
                    # Then we continue until we get to the required step
                    continue
                skipToStep = None  # Reset this when we get to the right step
                executor = StepFactory.getStepExecutor(stepType)
                result = self.doExecution(executor, step, wmbsJob)
                if result is not None:
                    skipToStep = result
            except WMException as ex:
                msg = "Encountered error while running ExecuteMaster:\n"
                msg += str(ex) + "\n"
                logging.error(msg)
                self.toTaskDirectory()
                break
            except Exception as ex:
                msg = "Encountered error while running ExecuteMaster:\n"
                msg += str(ex) + "\n"
                msg += str(traceback.format_exc()) + "\n"
                self.toTaskDirectory()
                logging.error(msg)
                break

        try:
            myThread.watchdogMonitor.notifyJobEnd(task)
        except WMException:
            self.toTaskDirectory()
        except Exception as ex:
            msg = "Encountered unhandled exception while ending the job:\n"
            msg += str(ex) + '\n'
            msg += str(traceback.format_exc()) + '\n'
            logging.error(msg)
            self.toTaskDirectory()

        return
コード例 #30
0
    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Fill with step-ending information
        """
        helper = WMStepHelper(step)

        stepSuccess = stepReport.getStepExitCode(stepName=helper.name())
        stepReport.setStepCounter(stepName=helper.name(),
                                  counter=self.stepCount)
        if self.jobSuccess == 0:
            self.jobSuccess = int(stepSuccess)
        if int(stepSuccess) != 0:
            self.failedStep = helper.name()

        data = {}
        data['MessageType'] = 'jobRuntime-stepEnd'
        data['MessageTS'] = time.strftime(self.tsFormat, time.gmtime())
        data['taskId'] = self.taskName
        data['jobId'] = self.jobName
        data['%d_ExeEnd' % self.stepCount] = helper.name()
        data['%d_ExeExitCode' % self.stepCount] = stepReport.getStepExitCode(
            stepName=helper.name())
        data['%d_NCores' % self.stepCount] = helper.getNumberOfCores()

        if helper.name() == 'StageOut':
            data['%d_StageOutExitStatus' % self.stepCount] = int(
                stepReport.stepSuccessful(stepName=helper.name()))

        times = stepReport.getTimes(stepName=helper.name())
        if times['stopTime'] is not None and times['startTime'] is not None:
            data['%d_ExeWCTime' %
                 self.stepCount] = times['stopTime'] - times['startTime']
        else:
            logging.error('Failed to retrieve start/end step time: %s', times)
            data['%d_ExeWCTime' % self.stepCount] = 0
        self.WrapperWCTime += data['%d_ExeWCTime' % self.stepCount]

        step = stepReport.retrieveStep(step=helper.name())
        try:
            data['%d_ExeCPUTime' % self.stepCount] = getattr(
                step.performance.cpu, 'TotalJobCPU', 0)
        except AttributeError:
            msg = "Failed to retrieve cpu performance for step %s. Defaulting to 0" % helper.name(
            )
            logging.warn(msg)
            data['%d_ExeCPUTime' % self.stepCount] = 0
        self.WrapperCPUTime += float(data['%d_ExeCPUTime' % self.stepCount])

        self.lastStep = helper.name()

        self.publish(data=data)

        return data
コード例 #31
0
    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Fill with step-ending information
        """
        helper = WMStepHelper(step)

        stepSuccess = stepReport.getStepExitCode(stepName=helper.name())
        stepReport.setStepCounter(stepName=helper.name(), counter=self.stepCount)
        if self.jobSuccess == 0:
            self.jobSuccess = int(stepSuccess)
        if int(stepSuccess) != 0:
            self.failedStep = helper.name()

        data = {}
        data['MessageType'] = 'jobRuntime-stepEnd'
        data['MessageTS'] = time.strftime(self.tsFormat,
                                          time.gmtime())
        data['taskId'] = self.taskName
        data['jobId'] = self.jobName
        data['%d_ExeEnd' % self.stepCount] = helper.name()
        data['%d_ExeExitCode' % self.stepCount] = stepReport.getStepExitCode(
            stepName=helper.name())
        data['%d_NCores' % self.stepCount] = helper.getNumberOfCores()

        if helper.name() == 'StageOut':
            data['%d_StageOutExitStatus' % self.stepCount] = int(
                stepReport.stepSuccessful(stepName=helper.name()))

        times = stepReport.getTimes(stepName=helper.name())
        if times['stopTime'] is not None and times['startTime'] is not None:
            data['%d_ExeWCTime' % self.stepCount] = times['stopTime'] - times['startTime']
        else:
            logging.error('Failed to retrieve start/end step time: %s', times)
            data['%d_ExeWCTime' % self.stepCount] = 0
        self.WrapperWCTime += data['%d_ExeWCTime' % self.stepCount]

        step = stepReport.retrieveStep(step=helper.name())
        try:
            data['%d_ExeCPUTime' % self.stepCount] = getattr(step.performance.cpu,
                                                             'TotalJobCPU', 0)
        except AttributeError:
            msg = "Failed to retrieve cpu performance for step %s. Defaulting to 0" % helper.name()
            logging.warn(msg)
            data['%d_ExeCPUTime' % self.stepCount] = 0
        self.WrapperCPUTime += float(data['%d_ExeCPUTime' % self.stepCount])

        self.lastStep = helper.name()
        self.maxCores = max(self.maxCores, helper.getNumberOfCores())

        self.publish(data=data)

        return data
コード例 #32
0
ファイル: WMTask.py プロジェクト: samircury/WMCore
 def steps(self):
     """get WMStep structure"""
     if self.data.steps.topStepName == None:
         return None
     step = getattr(self.data.steps, self.data.steps.topStepName, None)
     return WMStepHelper(step)
コード例 #33
0
            raise WMExecutionFailure(spawnedChild.returncode, "CmsRunFailure",
                                     msg)

        try:
            self.report.parse(jobReportXML, stepName=self.stepName)
        except Exception, ex:
            # Catch it if something goes wrong
            raise WMExecutionFailure(50115, "BadJobReportXML", str(ex))

        #
        # If multicore is enabled, merged the output files and reports
        #
        if multicoreEnabled:
            self.multicoreMerge(scram, applicationStart)

        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()

        acquisitionEra = self.task.getAcquisitionEra()
        processingVer = self.task.getProcessingVersion()
        processingStr = self.task.getProcessingString()
        validStatus = self.workload.getValidStatus()
        inputPath = self.task.getInputDatasetPath()
        globalTag = typeHelper.getGlobalTag()
        prepID = self.workload.getPrepID()
        cacheUrl, cacheDB, configID = stepHelper.getConfigInfo()

        self.report.setValidStatus(validStatus=validStatus)
        self.report.setGlobalTag(globalTag=globalTag)
        self.report.setPrepID(prepID)
        self.report.setInputDataset(inputPath=inputPath)
コード例 #34
0
class PerformanceMonitor(WMRuntimeMonitor):
    """
    _PerformanceMonitor_

    Monitors the performance by pinging ps and
    recording data regarding the current step
    """



    def __init__(self):
        """
        Actual variable initialization
        in initMonitor
        """

        self.pid              = None
        self.uid              = os.getuid()
        #self.grabCommand      = "ps -u %i -o pid,ppid,rss,vsize,pcpu,pmem,cmd |grep %s |grep -v grep"
        self.monitorBase      = "ps -p %i -o pid,ppid,rss,vsize,pcpu,pmem,cmd -ww |grep %i"
        self.monitorCommand   = None
        self.currentStepSpace = None
        self.currentStepName  = None

        self.rss   = []
        self.vsize = []
        self.pcpu  = []
        self.pmem  = []

        self.maxRSS   = None
        self.maxVSize = None
        self.logPath  = None

        self.watchStepTypes = []

        self.disableStep = False

        WMRuntimeMonitor.__init__(self)


        return


    def initMonitor(self, task, job, logPath, args = {}):
        """
        _initMonitor_

        Puts together the information needed for the monitoring
        to actually find everything.
        """

        # Set the steps we want to watch
        self.watchStepTypes = args.get('WatchStepTypes', ['CMSSW', 'PerfTest'])

        self.maxRSS   = args.get('maxRSS', None)
        self.maxVSize = args.get('maxVSize', None)

        self.logPath = os.path.join(logPath)

        return

    def stepStart(self, step):
        """
        _stepStart_

        Assure that the monitor is pointing at the right step
        """

        self.stepHelper = WMStepHelper(step)
        self.currentStepName  = getStepName(step)
        self.currentStepSpace = None

        if not self.stepHelper.stepType() in self.watchStepTypes:
            self.disableStep = True
            logging.debug("PerformanceMonitor ignoring step of type %s" % self.stepHelper.stepType())
            return
        else:
            logging.debug("Beginning PeformanceMonitor step Initialization")
            self.disableStep = False


        return


    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Package the information and send it off
        """

        if not self.disableStep:
            # No information to correlate
            return

        self.currentStepName  = None
        self.currentStepSpace = None

        return


    def periodicUpdate(self):
        """
        Run on the defined intervals.

        """
        killProc = False

        if self.disableStep:
            # Then we aren't doing CPU monitoring
            # on this step
            return

        if self.currentStepName == None:
            # We're between steps
            return

        if self.currentStepSpace == None:
            # Then build the step space
            self.currentStepSpace = getStepSpace(self.stepHelper.name())

        stepPID = getStepPID(self.currentStepSpace, self.currentStepName)

        if stepPID == None:
            # Then we have no step PID, we can do nothing
            return

        # Now we run the monitor command and collate the data
        cmd = self.monitorBase % (stepPID, stepPID)
        stdout, stderr, retcode = subprocessAlgos.runCommand(cmd)

        output = stdout.split()
        if not len(output) > 7:
            # Then something went wrong in getting the ps data
            msg =  "Error when grabbing output from process ps\n"
            msg += "output = %s\n" % output
            msg += "command = %s\n" % self.monitorCommand
            logging.error(msg)
            return
        rss   = float(output[2])
        vsize = float(output[3])
        logging.info("Retrieved following performance figures:")
        logging.info("RSS: %s;  VSize: %s; PCPU: %s; PMEM: %s" % (output[2], output[3],
                                                                  output[4], output[5]))

        msg = 'Error in CMSSW step %s\n' % self.currentStepName
        if self.maxRSS != None and rss >= self.maxRSS:
            msg += "Job has exceeded maxRSS: %s\n" % self.maxRSS
            msg += "Job has RSS: %s\n" % rss
            killProc = True
        if self.maxVSize != None and vsize >= self.maxVSize:
            msg += "Job has exceeded maxVSize: %s\n" % self.maxVSize
            msg += "Job has VSize: %s\n" % vsize
            killProc = True


        if killProc:
            logging.error(msg)
            report  = Report.Report()
            # Find the global report
            logPath = os.path.join(self.currentStepSpace.location,
                                   '../../../',
                                   os.path.basename(self.logPath))
            try:
                if os.path.isfile(logPath):
                    # We should be able to find existant job report.
                    # If not, we're in trouble
                    logging.debug("Found pre-existant error report in DashboardMonitor termination.")
                    report.load(logPath)
                # Create a new step that won't be overridden by an exiting CMSSW
                if not report.retrieveStep(step = "PerformanceError"):
                    report.addStep(reportname = "PerformanceError")
                report.addError(stepName = "PerformanceError", exitCode = 99900,
                                errorType = "PerformanceKill", errorDetails = msg)
                report.save(logPath)
            except Exception, ex:
                # Basically, we can't write a log report and we're hosed
                # Kill anyway, and hope the logging file gets written out
                msg2 =  "Exception while writing out jobReport.\n"
                msg2 += "Aborting job anyway: unlikely you'll get any error report.\n"
                msg2 += str(ex)
                msg2 += str(traceback.format_exc()) + '\n'
                logging.error(msg2)

            try:
                logging.error("Attempting to kill job using SIGUSR2")
                os.kill(stepPID, signal.SIGUSR2)
            except Exception:
                os.kill(stepPID, signal.SIGTERM)

        return
コード例 #35
0
ファイル: CMSSW.py プロジェクト: franzoni/WMCore
                                     "CmsRunFailure", msg)

        try:
            self.report.parse(jobReportXML, stepName = self.stepName)
        except Exception, ex:
            # Catch it if something goes wrong
            raise WMExecutionFailure(50115, "BadJobReportXML", str(ex))


        #
        # If multicore is enabled, merged the output files and reports
        #
        if multicoreEnabled:
            self.multicoreMerge(scram,applicationStart)

        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()

        acquisitionEra = self.task.getAcquisitionEra()
        processingVer  = self.task.getProcessingVersion()
        processingStr  = self.task.getProcessingString()
        validStatus    = self.workload.getValidStatus()
        inputPath      = self.task.getInputDatasetPath()
        globalTag      = typeHelper.getGlobalTag()
        prepID        = self.workload.getPrepID()
        cacheUrl, cacheDB, configID = stepHelper.getConfigInfo()

        self.report.setValidStatus(validStatus = validStatus)
        self.report.setGlobalTag(globalTag = globalTag)
        self.report.setPrepID(prepID)
        self.report.setInputDataset(inputPath = inputPath)
コード例 #36
0
ファイル: PerformanceMonitor.py プロジェクト: vytjan/WMCore
class PerformanceMonitor(WMRuntimeMonitor):
    """
    _PerformanceMonitor_

    Monitors the performance by pinging ps and
    recording data regarding the current step
    """

    def __init__(self):
        """
        Actual variable initialization
        in initMonitor
        """

        self.pid = None
        self.uid = os.getuid()
        self.monitorBase = "ps -p %i -o pid,ppid,rss,pcpu,pmem,cmd -ww | grep %i"
        self.pssMemoryCommand = "awk '/^Pss/ {pss += $2} END {print pss}' /proc/%i/smaps"
        self.monitorCommand = None
        self.currentStepSpace = None
        self.currentStepName = None

        self.rss = []
        self.pcpu = []
        self.pmem = []

        self.maxPSS = None
        self.softTimeout = None
        self.hardTimeout = None
        self.logPath = None
        self.startTime = None
        self.killRetry = False  # will trigger a hard (SIGTERM) instead of soft kill

        self.watchStepTypes = []

        self.disableStep = False

        WMRuntimeMonitor.__init__(self)

        return

    def initMonitor(self, task, job, logPath, args=None):
        """
        _initMonitor_

        Puts together the information needed for the monitoring
        to actually find everything.
        """
        args = args or {}

        # Set the steps we want to watch
        self.watchStepTypes = args.get('WatchStepTypes', ['CMSSW', 'PerfTest'])

        self.maxPSS = args.get('maxPSS', args.get('maxRSS'))
        self.softTimeout = args.get('softTimeout', None)
        self.hardTimeout = args.get('hardTimeout', None)
        self.numOfCores = args.get('cores', None)

        self.logPath = os.path.join(logPath)

        return

    def jobStart(self, task):
        """
        _jobStart_

        Acknowledge that the job has started and initialize the time
        """
        self.startTime = time.time()

        return

    def stepStart(self, step):
        """
        _stepStart_

        Assure that the monitor is pointing at the right step
        """

        self.stepHelper = WMStepHelper(step)
        self.currentStepName = getStepName(step)
        self.currentStepSpace = None

        if not self.stepHelper.stepType() in self.watchStepTypes:
            self.disableStep = True
            logging.debug("PerformanceMonitor ignoring step of type %s", self.stepHelper.stepType())
            return
        else:
            logging.debug("Beginning PeformanceMonitor step Initialization")
            self.disableStep = False

        return

    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Package the information and send it off
        """

        if not self.disableStep:
            # No information to correlate
            return

        self.currentStepName = None
        self.currentStepSpace = None

        return

    def periodicUpdate(self):
        """
        Run on the defined intervals.

        """
        killProc = False
        killHard = False
        reason = ''
        errorCodeLookup = {'PSS': 50660,
                           'Wallclock time': 50664,
                           '': 99999}

        if self.disableStep:
            # Then we aren't doing CPU monitoring
            # on this step
            return

        if self.currentStepName is None:
            # We're between steps
            return

        if self.currentStepSpace is None:
            # Then build the step space
            self.currentStepSpace = getStepSpace(self.stepHelper.name())

        stepPID = getStepPID(self.currentStepSpace, self.currentStepName)

        if stepPID is None:
            # Then we have no step PID, we can do nothing
            return

        # Now we run the ps monitor command and collate the data
        # Gathers RSS, %CPU and %MEM statistics from ps
        ps_cmd = self.monitorBase % (stepPID, stepPID)
        stdout, _stderr, _retcode = subprocessAlgos.runCommand(ps_cmd)

        ps_output = stdout.split()
        if not len(ps_output) > 6:
            # Then something went wrong in getting the ps data
            msg = "Error when grabbing output from process ps\n"
            msg += "output = %s\n" % ps_output
            msg += "command = %s\n" % ps_cmd
            logging.error(msg)
            return

        # run the command to gather PSS memory statistics from /proc/<pid>/smaps
        smaps_cmd = self.pssMemoryCommand % (stepPID)
        stdout, _stderr, _retcode = subprocessAlgos.runCommand(smaps_cmd)

        smaps_output = stdout.split()
        if not len(smaps_output) == 1:
            # Then something went wrong in getting the smaps data
            msg = "Error when grabbing output from smaps\n"
            msg += "output = %s\n" % smaps_output
            msg += "command = %s\n" % smaps_cmd
            logging.error(msg)
            return

        # smaps also returns data in kiloBytes, let's make it megaBytes
        # I'm also confused with these megabytes and mebibytes...
        pss = int(smaps_output[0]) // 1000

        logging.info("PSS: %s; RSS: %s; PCPU: %s; PMEM: %s", smaps_output[0], ps_output[2], ps_output[3], ps_output[4])

        msg = 'Error in CMSSW step %s\n' % self.currentStepName
        msg += 'Number of Cores: %s\n' % self.numOfCores

        if self.maxPSS is not None and pss >= self.maxPSS:
            msg += "Job has exceeded maxPSS: %s MB\n" % self.maxPSS
            msg += "Job has PSS: %s MB\n" % pss
            killProc = True
            reason = 'PSS'
        elif self.hardTimeout is not None and self.softTimeout is not None:
            currentTime = time.time()
            if (currentTime - self.startTime) > self.softTimeout:
                killProc = True
                reason = 'Wallclock time'
                msg += "Job has been running for more than: %s\n" % str(self.softTimeout)
                msg += "Job has been running for: %s\n" % str(currentTime - self.startTime)
            if (currentTime - self.startTime) > self.hardTimeout:
                killHard = True
                msg += "Job exceeded soft timeout"

        if not killProc:
            # then job is behaving well, there is nothing to do
            return

        # make sure we persist the performance error only once
        if not self.killRetry:
            logging.error(msg)
            report = Report.Report()
            # Find the global report
            logPath = os.path.join(self.currentStepSpace.location,
                                   '../../../',
                                   os.path.basename(self.logPath))
            try:
                if os.path.isfile(logPath):
                    # We should be able to find existant job report.
                    # If not, we're in trouble
                    logging.debug("Found pre-existant error report in PerformanceMonitor termination.")
                    report.load(logPath)
                # Create a new step that won't be overridden by an exiting CMSSW
                if not report.retrieveStep(step="PerformanceError"):
                    report.addStep(reportname="PerformanceError")
                report.addError(stepName="PerformanceError", exitCode=errorCodeLookup[reason],
                                errorType="PerformanceKill", errorDetails=msg)
                report.save(logPath)
            except Exception as ex:
                # Basically, we can't write a log report and we're hosed
                # Kill anyway, and hope the logging file gets written out
                msg2 = "Exception while writing out jobReport.\n"
                msg2 += "Aborting job anyway: unlikely you'll get any error report.\n"
                msg2 += "Error: %s" % str(ex)
                logging.exception(msg2)

        try:
            if not killHard and not self.killRetry:
                logging.error("Attempting to kill step using SIGUSR2")
                os.kill(stepPID, signal.SIGUSR2)
            else:
                logging.error("Attempting to kill step using SIGTERM")
                os.kill(stepPID, signal.SIGTERM)
        except Exception:
            logging.error("Attempting to kill step using SIGTERM")
            os.kill(stepPID, signal.SIGTERM)
        finally:
            self.killRetry = True

        return
コード例 #37
0
ファイル: CMSSW.py プロジェクト: mmascher/WMCore
    def execute(self, emulator=None):
        """
        _execute_


        """
        stepModule = "WMTaskSpace.%s" % self.stepName
        if emulator is not None:
            return emulator.emulate(self.step, self.job)

        # write the wrapper script to a temporary location
        # I don't pass it directly through os.system because I don't
        # trust that there won't be shell-escape shenanigans with
        # arbitrary input files
        scramSetup = self.step.application.setup.softwareEnvironment
        scramCommand = self.step.application.setup.scramCommand
        scramProject = self.step.application.setup.scramProject
        scramArch = self.step.application.setup.scramArch
        cmsswVersion = self.step.application.setup.cmsswVersion
        jobReportXML = self.step.output.jobReport
        cmsswCommand = self.step.application.command.executable
        cmsswConfig = self.step.application.command.configuration
        cmsswArguments = self.step.application.command.arguments
        userTarball = ",".join(self.step.user.inputSandboxes)
        userFiles = ",".join(self.step.user.userFiles)
        logging.info("User files are %s", userFiles)
        logging.info("User sandboxes are %s", userTarball)

        multicoreSettings = self.step.application.multicore
        try:
            logging.info("CMSSW configured for %s cores", multicoreSettings.numberOfCores)
        except AttributeError:
            logging.info("No value set for multicore.numberOfCores")

        logging.info("Executing CMSSW step")

        #
        # set any global environment variables
        #
        try:
            os.environ["FRONTIER_ID"] = "wmagent_%s" % (self.report.data.workload)
        except Exception as ex:
            logging.error("Have critical error in setting FRONTIER_ID: %s", str(ex))
            logging.error("Continuing, as this is not a critical function yet.")

        #
        # scram bootstrap
        #
        scram = Scram(
            command=scramCommand,
            version=cmsswVersion,
            initialise=self.step.application.setup.softwareEnvironment,
            directory=self.step.builder.workingDir,
            architecture=scramArch,
        )

        logging.info("Runing SCRAM")
        try:
            projectOutcome = scram.project()
        except Exception as ex:
            msg = "Exception raised while running scram.\n"
            msg += str(ex)
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)

        if projectOutcome > 0:
            msg = scram.diagnostic()
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)
        runtimeOutcome = scram.runtime()
        if runtimeOutcome > 0:
            msg = scram.diagnostic()
            logging.critical("Error running SCRAM")
            logging.critical(msg)
            raise WMExecutionFailure(50513, "ScramSetupFailure", msg)

        #
        # pre scripts
        #
        logging.info("Running PRE scripts")
        for script in self.step.runtime.preScripts:
            # TODO: Exception handling and error handling & logging
            scriptProcess = subprocess.Popen(
                ["/bin/bash"],
                shell=True,
                cwd=self.step.builder.workingDir,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )
            # BADPYTHON
            scriptProcess.stdin.write("export LD_LIBRARY_PATH=$LD_LIBRARY_PATH\n")
            invokeCommand = "%s -m WMCore.WMRuntime.ScriptInvoke %s %s \n" % (sys.executable, stepModule, script)
            logging.info("    Invoking command: %s", invokeCommand)
            scriptProcess.stdin.write(invokeCommand)
            stdout, stderr = scriptProcess.communicate()
            retCode = scriptProcess.returncode
            if retCode > 0:
                msg = "Error running command\n%s\n" % invokeCommand
                msg += "%s\n %s\n %s\n" % (retCode, stdout, stderr)
                logging.critical("Error running command")
                logging.critical(msg)
                raise WMExecutionFailure(50513, "PreScriptFailure", msg)

        #
        # pre scripts with scram
        #
        logging.info("RUNNING SCRAM SCRIPTS")
        for script in self.step.runtime.scramPreScripts:
            # invoke scripts with scram()
            runtimeDir = getattr(self.step.runtime, "scramPreDir", None)
            invokeCommand = (
                self.step.runtime.invokeCommand
                if hasattr(self.step.runtime, "invokeCommand")
                else "%s -m WMCore.WMRuntime.ScriptInvoke %s" % (sys.executable, stepModule)
            )
            invokeCommand += " %s \n" % script
            retCode = scram(invokeCommand, runtimeDir=runtimeDir)
            if retCode > 0:
                msg = "Error running command\n%s\n" % invokeCommand
                msg += "%s\n " % retCode
                msg += scram.diagnostic()
                logging.critical(msg)
                raise WMExecutionFailure(50513, "PreScriptScramFailure", msg)

        configPath = "%s/%s-main.sh" % (self.step.builder.workingDir, self.stepName)
        handle = open(configPath, "w")
        handle.write(CONFIG_BLOB)
        handle.close()

        # spawn this new process
        # the script looks for:
        # <SCRAM_COMMAND> <SCRAM_PROJECT> <CMSSW_VERSION> <JOB_REPORT> <EXECUTABLE> <CONFIG>
        # open the output files
        stdoutHandle = open(self.step.output.stdout, "w")
        stderrHandle = open(self.step.output.stderr, "w")
        args = [
            "/bin/bash",
            configPath,
            scramSetup,
            scramArch,
            scramCommand,
            scramProject,
            cmsswVersion,
            jobReportXML,
            cmsswCommand,
            cmsswConfig,
            userTarball,
            userFiles,
            cmsswArguments,
        ]
        logging.info("Executing CMSSW. args: %s", args)

        # at CERN override the environment to work around problem with GSI authentication plugin and EOS
        if socket.getfqdn().endswith("cern.ch"):
            returncode = subprocess.call(
                args, stdout=stdoutHandle, stderr=stderrHandle, env=dict(os.environ, XRD_LOADBALANCERTTL="86400")
            )
        else:
            returncode = subprocess.call(args, stdout=stdoutHandle, stderr=stderrHandle)

        self.setCondorChirpAttrDelayed("Chirp_WMCore_cmsRun_ExitCode", returncode)
        self.setCondorChirpAttrDelayed("Chirp_WMCore_%s_ExitCode" % self.stepName, returncode)

        stdoutHandle.close()
        stderrHandle.close()

        self.step.execution.exitStatus = returncode
        argsDump = {"arguments": args}

        if returncode != 0:
            msg = "Error running cmsRun\n%s\n" % argsDump
            msg += "Return code: %s\n" % returncode
            logging.critical(msg)
            raise WMExecutionFailure(returncode, "CmsRunFailure", msg)

        try:
            self.report.parse(jobReportXML, stepName=self.stepName)
        except Exception as ex:
            # Catch it if something goes wrong
            raise WMExecutionFailure(50115, "BadJobReportXML", str(ex))

        stepHelper = WMStepHelper(self.step)
        typeHelper = stepHelper.getTypeHelper()

        acquisitionEra = self.task.getAcquisitionEra()
        processingVer = self.task.getProcessingVersion()
        processingStr = self.task.getProcessingString()
        validStatus = self.workload.getValidStatus()
        inputPath = self.task.getInputDatasetPath()
        globalTag = typeHelper.getGlobalTag()
        prepID = self.task.getPrepID()
        cacheUrl, cacheDB, configID = stepHelper.getConfigInfo()

        self.report.setValidStatus(validStatus=validStatus)
        self.report.setGlobalTag(globalTag=globalTag)
        self.report.setPrepID(prepID)
        self.report.setInputDataset(inputPath=inputPath)
        self.report.setAcquisitionProcessing(
            acquisitionEra=acquisitionEra, processingVer=processingVer, processingStr=processingStr
        )
        self.report.setConfigURL(configURL="%s;;%s;;%s" % (cacheUrl, cacheDB, configID))

        # Attach info to files
        self.report.addInfoToOutputFilesForStep(stepName=self.stepName, step=self.step)

        self.report.checkForOutputFiles(stepName=self.stepName)
        self.report.checkForAdlerChecksum(stepName=self.stepName)
        self.report.checkForRunLumiInformation(stepName=self.stepName)

        if self.step.output.keep != True:
            self.report.killOutput()
        else:
            # Check that we only keep the desired output
            for module in stepHelper.getIgnoredOutputModules():
                self.report.deleteOutputModuleForStep(stepName=self.stepName, moduleName=module)

        # Add stageout LFN to existing TFileService files
        reportAnalysisFiles = self.report.getAnalysisFilesFromStep(self.stepName)
        for reportAnalysisFile in reportAnalysisFiles:
            newLFN = analysisFileLFN(reportAnalysisFile.fileName, self.step.user.lfnBase, self.job)
            addAttributesToFile(reportAnalysisFile, pfn=reportAnalysisFile.fileName, lfn=newLFN, validate=False)

        # Add analysis file entries for additional files listed in workflow
        for fileName in stepHelper.listAnalysisFiles():
            analysisFile = stepHelper.getAnalysisFile(fileName)
            if os.path.isfile(analysisFile.fileName):
                newLFN = analysisFileLFN(analysisFile.fileName, analysisFile.lfnBase, self.job)
                self.report.addAnalysisFile(
                    analysisFile.fileName,
                    lfn=newLFN,
                    Source="UserDefined",
                    pfn=os.path.join(os.getcwd(), analysisFile.fileName),
                    validate=False,
                )

        return
コード例 #38
0
    def stepEnd(self, step, stepReport):
        """
        _stepEnd_

        Fill with step-ending information
        """
        helper = WMStepHelper(step)

        stepSuccess = stepReport.getStepExitCode(stepName=helper.name())
        stepReport.setStepCounter(stepName=helper.name(),
                                  counter=self.stepCount)
        if self.jobSuccess == 0:
            self.jobSuccess = int(stepSuccess)
        if int(stepSuccess) != 0:
            self.failedStep = helper.name()

        data = {}
        data['MessageType'] = 'jobRuntime-stepEnd'
        data['MessageTS'] = time.strftime(self.tsFormat, time.gmtime())
        data['taskId'] = self.taskName
        data['jobId'] = self.jobName
        data['%d_ExeEnd' % self.stepCount] = helper.name()
        data['%d_ExeExitCode' % self.stepCount] = stepReport.getStepExitCode(
            stepName=helper.name())

        if helper.name() == 'StageOut':
            data['%d_StageOutExitStatus' % self.stepCount] = int(
                stepReport.stepSuccessful(stepName=helper.name()))

        times = stepReport.getTimes(stepName=helper.name())
        if times['stopTime'] != None and times['startTime'] != None:
            data['%d_ExeWCTime' % self.stepCount] = \
                                       times['stopTime'] - times['startTime']
            data['%d_NCores' % self.stepCount] = helper.getNumberOfCores()
            self.maxCores = max(self.maxCores, helper.getNumberOfCores())

        step = stepReport.retrieveStep(step=helper.name())

        if hasattr(step, 'performance'):
            if hasattr(step.performance, 'cpu'):
                data['%d_ExeCPUTime' % self.stepCount] = \
                                                getattr(step.performance.cpu,
                                                        'TotalJobCPU', 0)
                self.WrapperCPUTime += float(data['%d_ExeCPUTime' %
                                                  self.stepCount])

        self.WrapperWCTime += data['%d_ExeWCTime' % self.stepCount]
        self.lastStep = helper.name()

        self.publish(data=data)

        return data