Esempio n. 1
0
    def python(self, fast=False, fileReport=defaultFileReport):
        myDict = {
            'name': self._trf.name,
            'reportVersion': self._reportVersion,
            'cmdLine': ' '.join(shQuoteStrings(sys.argv)),
            'exitAcronym': trfExit.codeToName(self._trf.exitCode),
            'exitCode': self._trf.exitCode,
            'created': isodate(),
            'resource': {
                'executor': {},
                'transform': {}
            },
            'files': {}
        }
        if len(self._trf.exitMsg) > self._maxMsgLen:
            myDict['exitMsg'] = self._trf.exitMsg[:self._maxMsgLen -
                                                  len(self._truncationMsg
                                                      )] + self._truncationMsg
            myDict['exitMsgExtra'] = self._trf.exitMsg[self._maxMsgLen -
                                                       len(self._truncationMsg
                                                           ):]
        else:
            myDict['exitMsg'] = self._trf.exitMsg
            myDict['exitMsgExtra'] = ""

        # Iterate over files
        for fileType in ('input', 'output', 'temporary'):
            if fileReport[fileType]:
                myDict['files'][fileType] = []
        # Should have a dataDictionary, unless something went wrong very early...
        for dataType, dataArg in iteritems(self._trf._dataDictionary):
            if dataArg.auxiliaryFile:  # Always skip auxilliary files from the report
                continue
            if fileReport[dataArg.io]:
                entry = {"type": dataType}
                entry.update(
                    trfFileReport(dataArg).python(fast=fast,
                                                  type=fileReport[dataArg.io]))
                # Supress RAW if all subfiles had nentries == 0
                if 'subFiles' in entry and len(
                        entry['subFiles']) == 0 and isinstance(
                            dataArg, trfArgClasses.argBSFile):
                    msg.info(
                        'No subFiles for entry {0}, suppressing from report.'.
                        format(entry['argName']))
                else:
                    myDict['files'][dataArg.io].append(entry)

        # We report on all executors, in execution order
        myDict['executor'] = []
        if hasattr(self._trf, '_executorPath'):
            for executionStep in self._trf._executorPath:
                exe = self._trf._executorDictionary[executionStep['name']]
                myDict['executor'].append(
                    trfExecutorReport(exe).python(fast=fast))
                # Executor resources are gathered here to unify where this information is held
                # and allow T0/PanDA to just store this JSON fragment on its own
                myDict['resource']['executor'][exe.name] = exeResourceReport(
                    exe, self)
                for mergeStep in exe.myMerger:
                    myDict['resource']['executor'][
                        mergeStep.name] = exeResourceReport(mergeStep, self)
            if self._dbDataTotal > 0 or self._dbTimeTotal > 0:
                myDict['resource']['dbDataTotal'] = self._dbDataTotal
                myDict['resource']['dbTimeTotal'] = self.roundoff(
                    self._dbTimeTotal)
        # Resource consumption
        reportTime = os.times()

        # Calculate total cpu time we used -
        myCpuTime = reportTime[0] + reportTime[1]
        childCpuTime = reportTime[2] + reportTime[3]
        wallTime = reportTime[4] - self._trf.transformStart[4]
        cpuTime = myCpuTime
        cpuTimeTotal = 0
        cpuTimePerWorker = myCpuTime
        maxWorkers = 1
        msg.debug(
            'Raw cpu resource consumption: transform {0}, children {1}'.format(
                myCpuTime, childCpuTime))
        # Reduce childCpuTime by times reported in the executors (broken for MP...?)
        for exeName, exeReport in iteritems(myDict['resource']['executor']):
            if 'mpworkers' in exeReport:
                if exeReport['mpworkers'] > maxWorkers:
                    maxWorkers = exeReport['mpworkers']
            try:
                msg.debug('Subtracting {0}s time for executor {1}'.format(
                    exeReport['cpuTime'], exeName))
                childCpuTime -= exeReport['cpuTime']
            except TypeError:
                pass
            try:
                cpuTime += exeReport['cpuTime']
                cpuTimeTotal += exeReport['total']['cpuTime']
                if 'cpuTimePerWorker' in exeReport:
                    msg.debug('Adding {0}s to cpuTimePerWorker'.format(
                        exeReport['cpuTimePerWorker']))
                    cpuTimePerWorker += exeReport['cpuTimePerWorker']
                else:
                    msg.debug(
                        'Adding nonMP cpuTime {0}s to cpuTimePerWorker'.format(
                            exeReport['cpuTime']))
                    cpuTimePerWorker += exeReport['cpuTime']
            except TypeError:
                pass

        msg.debug(
            'maxWorkers: {0}, cpuTimeTotal: {1}, cpuTimePerWorker: {2}'.format(
                maxWorkers, cpuTime, cpuTimePerWorker))
        reportGenerationCpuTime = reportGenerationWallTime = None
        if self._trf.outFileValidationStop and reportTime:
            reportGenerationCpuTime = calcCpuTime(
                self._trf.outFileValidationStop, reportTime)
            reportGenerationWallTime = calcWallTime(
                self._trf.outFileValidationStop, reportTime)

        myDict['resource']['transform'] = {
            'cpuTime': self.roundoff(myCpuTime),
            'cpuTimeTotal': self.roundoff(cpuTimeTotal),
            'externalCpuTime': self.roundoff(childCpuTime),
            'wallTime': self.roundoff(wallTime),
            'transformSetup': {
                'cpuTime': self.roundoff(self._trf.transformSetupCpuTime),
                'wallTime': self.roundoff(self._trf.transformSetupWallTime)
            },
            'inFileValidation': {
                'cpuTime': self.roundoff(self._trf.inFileValidationCpuTime),
                'wallTime': self.roundoff(self._trf.inFileValidationWallTime)
            },
            'outFileValidation': {
                'cpuTime': self.roundoff(self._trf.outFileValidationCpuTime),
                'wallTime': self.roundoff(self._trf.outFileValidationWallTime)
            },
            'reportGeneration': {
                'cpuTime': self.roundoff(reportGenerationCpuTime),
                'wallTime': self.roundoff(reportGenerationWallTime)
            },
        }
        if self._trf.processedEvents:
            myDict['resource']['transform'][
                'processedEvents'] = self._trf.processedEvents
        myDict['resource']['transform']['trfPredata'] = self._trf.trfPredata
        # check for devision by zero for fast jobs, unit tests
        if wallTime > 0:
            myDict['resource']['transform']['cpuEfficiency'] = round(
                cpuTime / maxWorkers / wallTime, 4)
            myDict['resource']['transform']['cpuPWEfficiency'] = round(
                cpuTimePerWorker / wallTime, 4)
        myDict['resource']['machine'] = machineReport().python(fast=fast)

        return myDict
Esempio n. 2
0
    def classicPython(self, fast=False):
        # Things we can get directly from the transform
        trfDict = {
            'jobInputs': [],  # Always empty?
            'jobOutputs': [],  # Filled in below...
            'more': {
                'Machine': 'unknown'
            },
            'trfAcronym': trfExit.codeToName(self._trf.exitCode),
            'trfCode': self._trf.exitCode,
            'trfExitCode': self._trf.exitCode,
        }

        if self._trf.lastExecuted is not None:
            trfDict.update({
                'athAcronym': self._trf.lastExecuted.errMsg,
                'athCode': self._trf.lastExecuted.rc
            })

        # Emulate the NEEDCHECK behaviour
        if hasattr(self._trf, '_executorPath'):
            for executor in self._trf._executorPath:
                if hasattr(executor, '_logScan') and self._trf.exitCode == 0:
                    if executor._logScan._levelCounter[
                            'FATAL'] > 0 or executor._logScan._levelCounter[
                                'CRITICAL'] > 0:
                        # This should not happen!
                        msg.warning(
                            'Found FATAL/CRITICAL errors and exit code 0 - reseting to TRF_LOGFILE_FAIL'
                        )
                        self._trf.exitCode = trfExit.nameToCode(
                            'TRF_LOGFILE_FAIL')
                        trfDict['trfAcronym'] = 'TRF_LOGFILE_FAIL'
                    elif executor._logScan._levelCounter['ERROR'] > 0:
                        msg.warning(
                            'Found errors in logfile scan - changing exit acronymn to NEEDCHECK.'
                        )
                        trfDict['trfAcronym'] = 'NEEDCHECK'

        # Now add files
        fileArgs = self._trf.getFiles(io='output')
        for fileArg in fileArgs:
            # N.B. In the original Tier 0 gpickles there was executor
            # information added for each file (such as autoConfiguration, preExec).
            # However, Luc tells me it is ignored, so let's not bother.
            trfDict['jobOutputs'].extend(
                trfFileReport(fileArg).classicPython(fast=fast))
            # AMITag and friends is added per-file, but it's known only to the transform, so set it here:
            for argdictKey in (
                    'AMITag',
                    'autoConfiguration',
            ):
                if argdictKey in self._trf.argdict:
                    trfDict['jobOutputs'][-1]['more']['metadata'][
                        argdictKey] = self._trf.argdict[argdictKey].value
            # Mangle substep argumemts back to the old format
            for substepKey in ('preExec', 'postExec', 'preInclude',
                               'postInclude'):
                if substepKey in self._trf.argdict:
                    for substep, values in iteritems(
                            self._trf.argdict[substepKey].value):
                        if substep == 'all':
                            trfDict['jobOutputs'][-1]['more']['metadata'][
                                substepKey] = values
                        else:
                            trfDict['jobOutputs'][-1]['more']['metadata'][
                                substepKey + '_' + substep] = values

        # Now retrieve the input event count
        nentries = 'UNKNOWN'
        for fileArg in self._trf.getFiles(io='input'):
            thisArgNentries = fileArg.nentries
            if isinstance(thisArgNentries, int):
                if nentries == 'UNKNOWN':
                    nentries = thisArgNentries
                elif thisArgNentries != nentries:
                    msg.warning(
                        'Found a file with different event count than others: {0} != {1} for {2}'
                        .format(thisArgNentries, nentries, fileArg))
                    # Take highest number?
                    if thisArgNentries > nentries:
                        nentries = thisArgNentries
        trfDict['nevents'] = nentries

        # Tier 0 expects the report to be in a top level dictionary under the prodsys key
        return {'prodsys': trfDict}
Esempio n. 3
0
    def execute(self):
        msg.debug('Entering transform execution phase')

        try:
            # Intercept a few special options here
            if 'dumpargs' in self._argdict:
                self.parser.dumpArgs()
                sys.exit(0)

            # Graph stuff!
            msg.info('Resolving execution graph')
            self._setupGraph()

            if 'showSteps' in self._argdict:
                for exe in self._executors:
                    print "Executor Step: {0} (alias {1})".format(
                        exe.name, exe.substep)
                    if msg.level <= logging.DEBUG:
                        print " {0} -> {1}".format(exe.inData, exe.outData)
                sys.exit(0)

            if 'showGraph' in self._argdict:
                print self._executorGraph
                sys.exit(0)

            # Graph stuff!
            msg.info('Starting to trace execution path')
            self._tracePath()
            msg.info('Execution path found with {0} step(s): {1}'.format(
                len(self._executorPath),
                ' '.join([exe['name'] for exe in self._executorPath])))

            if 'showPath' in self._argdict:
                msg.debug('Execution path list is: {0}'.format(
                    self._executorPath))
                # Now print it nice
                print 'Executor path is:'
                for node in self._executorPath:
                    print '  {0}: {1} -> {2}'.format(node['name'],
                                                     list(node['input']),
                                                     list(node['output']))
                sys.exit(0)

            msg.debug('Execution path is {0}'.format(self._executorPath))

            # Prepare files for execution (separate method?)
            for dataType in [
                    data for data in self._executorGraph.data
                    if 'NULL' not in data
            ]:
                if dataType in self._dataDictionary:
                    msg.debug(
                        'Data type {0} maps to existing argument {1}'.format(
                            dataType, self._dataDictionary[dataType]))
                else:
                    fileName = 'tmp.' + dataType
                    # How to pick the correct argFile class?
                    for (prefix, suffix) in (('tmp', ''), ('output', 'File'),
                                             ('input', 'File')):
                        stdArgName = prefix + dataType + suffix
                        if stdArgName in self.parser._argClass:
                            msg.debug(
                                'Matched data type {0} to argument {1}'.format(
                                    dataType, stdArgName))
                            self._dataDictionary[
                                dataType] = self.parser._argClass[stdArgName](
                                    fileName)
                            self._dataDictionary[dataType].io = 'temporary'
                            break
                    if dataType not in self._dataDictionary:
                        if 'HIST' in fileName:
                            self._dataDictionary[dataType] = argHISTFile(
                                fileName,
                                io='temporary',
                                type=dataType.lower())

                        else:
                            self._dataDictionary[dataType] = argFile(
                                fileName,
                                io='temporary',
                                type=dataType.lower())
                            msg.debug(
                                'Did not find any argument matching data type {0} - setting to plain argFile: {1}'
                                .format(dataType,
                                        self._dataDictionary[dataType]))
                    self._dataDictionary[dataType].name = fileName

            # Now we can set the final executor configuration properly, with the final dataDictionary
            for executor in self._executors:
                executor.conf.setFromTransform(self)

            self.validateInFiles()

            for executionStep in self._executorPath:
                msg.debug('Now preparing to execute {0}'.format(executionStep))
                executor = self._executorDictionary[executionStep['name']]
                executor.preExecute(input=executionStep['input'],
                                    output=executionStep['output'])
                try:
                    executor.execute()
                    executor.postExecute()
                finally:
                    executor.validate()

            self._processedEvents = self.getProcessedEvents()
            self.validateOutFiles()

            msg.debug('Transform executor succeeded')
            self._exitCode = 0
            self._exitMsg = trfExit.codeToName(self._exitCode)

        except trfExceptions.TransformNeedCheckException as e:
            msg.warning(
                'Transform executor signaled NEEDCHECK condition: {0}'.format(
                    e.errMsg))
            self._exitCode = e.errCode
            self._exitMsg = e.errMsg
            self.generateReport(fast=False)

        except trfExceptions.TransformException as e:
            msg.critical('Transform executor raised %s: %s' %
                         (e.__class__.__name__, e.errMsg))
            self._exitCode = e.errCode
            self._exitMsg = e.errMsg
            # Try and write a job report...
            self.generateReport(fast=True)

        finally:
            # Clean up any orphaned processes and exit here if things went bad
            infanticide(message=True)
            if self._exitCode:
                msg.warning(
                    'Transform now exiting early with exit code {0} ({1})'.
                    format(e.errCode, e.errMsg))
                sys.exit(self._exitCode)