Example #1
0
    def appendToExecutorSet(self, executors):
        # Normalise to something iterable
        if isinstance(executors, transformExecutor):
            executors = [
                executors,
            ]
        elif not isinstance(executors, (list, tuple, set)):
            raise trfExceptions.TransformInternalException(
                trfExit.nameToCode('TRF_INTERNAL'),
                'Transform was initialised with an executor which was not a simple executor or an executor set'
            )

        # TRY TO DEPRECATE SETTING trf IN THE EXECUTOR - USE CONF!
        # Executor book keeping: set parent link back to me for all executors
        # Also setup a dictionary, indexed by executor name and check that name is unique
        ## Setting conf here not working - too early to get the dataDictionary
        for executor in executors:
            executor.trf = self
            if executor.name in self._executorDictionary:
                raise trfExceptions.TransformInternalException(
                    trfExit.nameToCode('TRF_INTERNAL'),
                    'Transform has been initialised with two executors with the same name ({0})'
                    ' - executor names must be unique'.format(executor.name))
            self._executors.add(executor)
            self._executorDictionary[executor.name] = executor
Example #2
0
def athenaMPoutputsLinkAndUpdate(newFullFilenames, fileArg):
    # Any files we link are numbered from 1, because we always set
    # the filename given to athena has _000 as a suffix so that the
    # mother process' file can be used without linking
    fileIndex = 1
    linkedNameList = []
    newFilenameValue = []
    for fname in newFullFilenames:
        if path.dirname(fname) == "":
            linkedNameList.append(None)
            newFilenameValue.append(fname)
        else:
            linkName = "{0}{1:03d}".format(path.basename(fname).rstrip('0'), fileIndex)
            linkedNameList.append(linkName)
            newFilenameValue.append(linkName)
            fileIndex += 1
            
    for linkname, fname in zip(linkedNameList, newFullFilenames):
        if linkname:
            if len(newFullFilenames) == 1:
                try:
                    os.rename(fname,fileArg.originalName)
                    newFilenameValue[0]=fileArg.originalName
                except OSError, e:
                    raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Failed to move {0} to {1}: {2}".format(fname, linkname, e))
            else:
                 try:
                     if path.lexists(linkname):
                         os.unlink(linkname)
                     os.symlink(fname, linkname)
                 except OSError, e:  
                     raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Failed to link {0} to {1}: {2}".format(fname, linkname, e))
Example #3
0
    def doToposort(self):
        # We will manipulate the graph, so deepcopy it
        graphCopy = copy.deepcopy(self._nodeDict)
        # Find all valid start nodes in this graph - ones with no data dependencies themselves
        startNodeNames = []
        for nodeName, node in iteritems(graphCopy):
            if len(node.connections['in']) == 0:
                startNodeNames.append(nodeName)

        if len(startNodeNames) == 0:
            raise trfExceptions.TransformGraphException(
                trfExit.nameToCode('TRF_GRAPH_ERROR'),
                'There are no starting nodes in this graph - non-DAG graphs are not supported'
            )

        msg.debug('Found this list of start nodes for toposort: {0}'.format(
            startNodeNames))

        # The startNodeNames holds the list of nodes with their dependencies now satisfied (no input edges anymore)
        while len(startNodeNames) > 0:
            # Take the next startNodeName and zap it from the graph
            theNodeName = startNodeNames.pop()
            theNode = graphCopy[theNodeName]
            self._toposort.append(theNodeName)
            del graphCopy[theNodeName]

            # Now delete the edges this node was a source for
            msg.debug(
                'Considering connections from node {0}'.format(theNodeName))
            for connectedNodeName in theNode.connections['out']:
                graphCopy[connectedNodeName].delConnection(toExe=theNodeName,
                                                           direction='in')
                # Look for nodes which now have their dependencies satisfied
                if len(graphCopy[connectedNodeName].connections['in']) == 0:
                    startNodeNames.append(connectedNodeName)

        # If there are nodes left then the graph has cycles, which means it's not a DAG
        if len(graphCopy) > 0:
            raise trfExceptions.TransformGraphException(
                trfExit.nameToCode('TRF_GRAPH_ERROR'),
                'Graph topological sort had no more start nodes, but nodes were left {0} - non-DAG graphs are not supported'
                .format(list(graphCopy)))

        msg.debug('Topologically sorted node order: {0}'.format(
            self._toposort))

        # Now toposort the input data for nodes
        self._toposortData = []
        for nodeName in self._toposort:
            # First add input data, then output data
            for dataType in self._nodeDict[nodeName].inputDataTypes:
                if dataType not in self._toposortData:
                    self._toposortData.append(dataType)
            for dataType in self._nodeDict[nodeName].outputDataTypes:
                if dataType not in self._toposortData:
                    self._toposortData.append(dataType)

        msg.debug('Topologically sorted data order: {0}'.format(
            self._toposortData))
Example #4
0
    def preExecute(self, input = set(), output = set()):
        msg.info('Preparing for BSJobSplitterExecutor execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output))

        #See if we need to unpack a TAR file
        if 'hitarFile' in self.conf.argdict:
            print ("Untarring inputHITARFile", self.conf.argdict['hitarFile'].value)
            try:
                f=tarfile.open(name=self.conf.argdict['hitarFile'].value[0])
                f.list()
                f.extractall()
                f.close()
            except Exception as e:
                raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error while unpacking and extracting HI input files for transform: {0}'.format(e))

        # There are two ways to configure this transform:
        # - Give an inputZeroBiasBSFile argument directly
        # - Give a inputBSCONFIGFile and jobNumber argument
        # Check now that we have a configuration that works

        if 'inputZeroBiasBSFile' in self.conf.argdict and 'inputBSCONFIGFile' in self.conf.argdict:
            #raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Both inputZeroBiasBSFile and inputBSCONFIGFile have been specified - please use only one.')
            del self.conf.argdict['inputZeroBiasBSFile']
            print ("WARNING - removed the inputZeroBiasBSFile argument, because inputZeroBiasBSFile and inputBSCONFIGFile were already specified")

        if 'inputBSCONFIGFile' in self.conf.argdict:
            if 'jobNumber' not in self.conf.argdict:
                raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'inputBSCONFIGFile is specified, but no jobNumber was given.')
            # Job number has to wrap around from 500, dropping back to 1
            wrappedJobNumber = (self.conf.argdict['jobNumber'].value-1)%500 + 1

            self._inputFilelist = 'filelist_{0}.txt'.format(wrappedJobNumber)
            self._lbnList = 'lbn_anal_map_{0}.txt'.format(wrappedJobNumber)

            try:
                print (self.conf.argdict['inputBSCONFIGFile'].value)
                f=tarfile.open(name=self.conf.argdict['inputBSCONFIGFile'].value[0])
                f.extract('filelist_{0}.txt'.format(wrappedJobNumber))
                f.extract('lbn_anal_map_{0}.txt'.format(wrappedJobNumber))
                f.close()
                bsInputs = open(self._inputFilelist).readline().rstrip().split(',')
                self.conf.addToArgdict('inputZeroBiasBSFile', trfArgClasses.argBSFile(bsInputs, io='input', type='BS', subtype='BS_ZeroBias'))
                self.conf.addToDataDictionary('ZeroBiasBS', self.conf.argdict['inputZeroBiasBSFile'])
                input.add('ZeroBiasBS')
                msg.info('Validating resolved input bytestream files')
                trfValidation.performStandardFileValidation({'ZeroBiasBS': self.conf.argdict['inputZeroBiasBSFile']}, io='input')
            except Exception as e:
                raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error while unpacking and extracting input files for transform: {0}'.format(e))

            # Now setup correct input arguments
            self.conf.argdict['InputLbnMapFile'] = trfArgClasses.argString(self._lbnList)
            self.conf.argdict['InputFileMapFile'] = trfArgClasses.argString(self._inputFilelist)

        else:
            #if 'lumiBlockMapFile' not in self.conf.argdict:
            #    raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'inputZeroBiasBSFile is specified, but no lumiBlockMapFile was given.')
            #self.conf.argdict['InputLbnMapFile'] = self.conf.argdict['lumiBlockMapFile']
            if 'lumiBlockMapFile' in self.conf.argdict:
                self.conf.argdict['InputLbnMapFile'] = self.conf.argdict['lumiBlockMapFile']
        super(BSJobSplitterExecutor, self).preExecute(input=input, output=output)
Example #5
0
    def exception_wrapper(*args, **kwargs):
        # Setup imports which the wrapper needs
        import signal
        import traceback

        import PyJobTransforms.trfExceptions as trfExceptions

        try:
            return func(*args, **kwargs)

        except KeyboardInterrupt:
            msg.critical(
                'Caught a keyboard interrupt - exiting at your request.')
            trfUtils.infanticide(message=True)
            sys.exit(128 + signal.SIGINT)

        # This subclass is treated as a 'normal' exit condition
        # but it should never happen in production as it's a transform definition error
        except trfExceptions.TransformSetupException as e:
            msg.critical('Transform setup failed: {0}'.format(e.errMsg))
            msg.critical('To help you debug here is the stack trace:')
            msg.critical(traceback.format_exc(None))
            msg.critical('(Early exit - no job report is produced)')
            trfUtils.infanticide(message=True)
            sys.exit(e.errCode)

        except trfExceptions.TransformException as e:
            msg.critical(
                'Got a transform exception in the outer exception handler: {0!s}'
                .format(e))
            msg.critical('Stack trace is...')
            msg.critical(traceback.format_exc(None))
            msg.critical(
                'Job reports are likely to be missing or incomplete - sorry')
            msg.critical('Please report this as a transforms bug!')
            trfUtils.infanticide(message=True)
            sys.exit(trfExit.nameToCode('TRF_UNEXPECTED_TRF_EXCEPTION'))

        except Exception as e:
            msg.critical(
                'Got a general exception in the outer exception handler: {0!s}'
                .format(e))
            msg.critical('Stack trace is...')
            msg.critical(traceback.format_exc(None))
            msg.critical(
                'Job reports are likely to be missing or incomplete - sorry')
            msg.critical('Please report this as a transforms bug!')
            trfUtils.infanticide(message=True)
            sys.exit(trfExit.nameToCode('TRF_UNEXPECTED_OTHER_EXCEPTION'))
Example #6
0
def detectAthenaMPProcs(argdict = {}):
    athenaMPProcs = 0
    
    # Try and detect if any AthenaMP has been enabled 
    try:
        if 'ATHENA_PROC_NUMBER' in os.environ:
            athenaMPProcs = int(os.environ['ATHENA_PROC_NUMBER'])
            if athenaMPProcs < 0:
                raise ValueError("ATHENA_PROC_NUMBER value was less than zero")
            msg.info('AthenaMP detected from ATHENA_PROC_NUMBER with {0} workers'.format(athenaMPProcs))
        elif 'athenaopts' in argdict:
            for substep in argdict['athenaopts'].value:
                procArg = [opt.replace("--nprocs=", "") for opt in argdict['athenaopts'].value[substep] if '--nprocs' in opt]
                if len(procArg) == 0:
                    athenaMPProcs = 0
                elif len(procArg) == 1:
                    athenaMPProcs = int(procArg[0])
                    if athenaMPProcs < 0:
                        raise ValueError("--nprocs was set to a value less than zero")
                else:
                    raise ValueError("--nprocs was set more than once in 'athenaopts'")
                msg.info('AthenaMP detected from "nprocs" setting with {0} workers for substep {1}'.format(athenaMPProcs,substep))
    except ValueError, errMsg:
        myError = 'Problem discovering AthenaMP setup: {0}'.format(errMsg)
        raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), myError)
    def postExecute(self):

        msg.info("Check for trig_cost.root file")
        #costmon generates the file trig_cost.root
        #to save on panda it needs to be renamed via the outputNTUP_TRIGCOSTFile argument
        expectedFileName = 'trig_cost.root'
        #first check argument is in dict
        if 'outputNTUP_TRIGCOSTFile' in self.conf.argdict:
            #check file is created
            if (os.path.isfile(expectedFileName)):
                msg.info(
                    'Renaming %s to %s' %
                    (expectedFileName,
                     self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0]))
                try:
                    os.rename(
                        expectedFileName,
                        self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0])
                except OSError, e:
                    raise trfExceptions.TransformExecutionException(
                        trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'),
                        'Exception raised when renaming {0} to {1}: {2}'.
                        format(
                            expectedFileName, self.conf.
                            argdict['outputNTUP_TRIGCOSTFile'].value[0], e))
            else:
                msg.error(
                    'NTUP_TRIGCOST argument defined %s but %s not created' %
                    (self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0],
                     expectedFileName))
Example #8
0
def checkFileList(filelist):
    """Converts list of files of type ds#filename into a list of filenames, meanwhile
    setting ds value. If check is true it also checks the existence of the files."""
    # First check if type is list
    
    if not isinstance(filelist,list):
        filelist=[filelist]

    for i,ifile in enumerate(filelist):
        # extract ds,runnumber and svcclass
        filename=getDsFileName(ifile)
        # pass file check if file is on castor
        if filename.find('/castor',0,8) != -1:
            pass
        elif not fileutil.exists(filename):
            found = fileutil.exists_suffix_number(filename + '.')
            if not found:

                errMsg = filename+' not found'
                raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_INPUT_FILE_VALIDATION_FAIL'), errMsg)

            if found != filename:

                filename = found
        # correct filename in list
        filelist[i]=filename
    return filelist
Example #9
0
    def classicSinglePython(self, filename, fast=False):
        if filename not in self._fileArg.value:
            raise trfExceptions.TransformReportException(
                trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'),
                'Unknown file ({0}) in the file report for {1}'.format(
                    filename, self._fileArg))
        # Direct population of some keys
        fileDict = {
            'lfn': filename,
            'dataset': self._fileArg.dataset,
        }
        # Fill in the mapped 'primary' keys
        for myKey, classicKey in iteritems(self._internalToGpickleMap):
            fileDict[classicKey] = self._fileArg.getSingleMetadata(
                fname=filename, metadataKey=myKey, populate=not fast)
            if classicKey == 'checkSum' and fileDict[classicKey] == 'UNDEFINED':
                # Old style is that we give back None when we don't know
                fileDict[classicKey] = None
            elif fileDict[classicKey] == 'UNDEFINED':
                # Suppress things we don't generally expect to know
                del fileDict[classicKey]
        # Base 'more' stuff which is known by the argFile itself
        fileDict['more'] = {'metadata': {'fileType': self._fileArg.type}}
        for myKey, classicKey in iteritems(self._internalToGpickleMoreMap):
            value = self._fileArg.getSingleMetadata(fname=filename,
                                                    metadataKey=myKey,
                                                    populate=not fast)
            if value != 'UNDEFINED':
                fileDict['more']['metadata'][classicKey] = value

        return fileDict
Example #10
0
 def preExecute(self, input = set(), output = set()):
     # First we need to strip the filter file down to events that are present 
     # in the RAW file we are going to skim. This is because the HI workflow
     # will provide millions of events in their filter file, more than acmd.py
     # can cope with.
     listEvtCommand = ['AtlListBSEvents.exe', '-l']
     listEvtCommand.extend(self.conf.argdict['inputBSFile'].value)
     # For best lookup speed, we store the runnumber/eventnumber in a dictionary (set would also
     # be fast)
     rawEventList = {} 
     try:
         for line in subprocess.check_output(listEvtCommand).split("\n"):
             if line.startswith("Index="):
                 try:
                     splitStrings = line.split(" ")
                     runprefix, runstr = splitStrings[1].split("=")
                     evtprefix, evtstr = splitStrings[2].split("=")
                     # Check sanity
                     if runprefix != "Run" or evtprefix != "Event":
                         msg.warning("Failed to understand this line from AtlListBSEvents: {0}".format(line))
                     else:
                         runnumber = int(runstr)
                         evtnumber = int(evtstr)
                         # We build up a string key as "RUN-EVENT", so that we can take advantage of
                         # the fast hash search against a dictionary 
                         rawEventList[runstr + "-" + evtstr] = True
                         msg.debug("Identified run {0}, event {1} in input RAW files".format(runstr, evtstr))
                 except ValueError, e:
                     msg.warning("Failed to understand this line from AtlListBSEvents: {0}".format(line))
     except subprocess.CalledProcessError, e:
         errMsg = "Call to AtlListBSEvents.exe failed: {0}".format(e)
         msg.error(erMsg)
         raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_EXEC_SETUP_FAIL"), errMsg)
Example #11
0
    def findExecutionPath(self):        
        # Switch off all nodes, except if we have a single node which is not data driven...
        self._execution = {}
        for nodeName, node in self._nodeDict.iteritems():
            if len(self._nodeDict) == 1 and node.inputDataTypes == set() and node.inputDataTypes == set():
                self._execution[nodeName] = {'enabled' : True, 'input' : set(), 'output' : set()}
            else:
                self._execution[nodeName] = {'enabled' : False, 'input' : set(), 'output' : set()}

        dataToProduce = copy.deepcopy(self._outputData)
        dataAvailable = copy.deepcopy(self._inputData)
                
        # Consider the next data type in topo order
        while len(dataToProduce) > 0:
            nextDataType = None
            for dataType in self._toposortData:
                if dataType in dataToProduce:
                    nextDataType = dataType
                    dataToProduce.remove(nextDataType)
                    dataAvailable.update([nextDataType])
                    break

            if not nextDataType:
                msg.error('Still have to produce data type(s) {0}, but did not find anything in the toposorted data list ({1}).' 
                          ' Transform parameters/graph are broken so aborting.'.format(dataToProduce, self._toposortData))
                raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                            'Data type graph error')

            msg.debug('Next data type to try is {0}'.format(nextDataType))
            bestPath = self._bestPath(nextDataType, dataAvailable)
            
            msg.debug('Found best path for {0}: {1}'.format(nextDataType, bestPath))

            ## @note Use @c modPath to construct an array which we iterate over in pairs of (currentNode, nextNode)
            modPath = bestPath.path + [None]
            for (nodeName, nextNodeName) in [ (n, modPath[modPath.index(n)+1]) for n in bestPath.path ]:
                self._execution[nodeName]['enabled'] = True
                # Add the necessary data types to the output of the first node and the input of the next
                if nodeName in bestPath.newData:
                    self._execution[nodeName]['output'].update(bestPath.newData[nodeName])
                    for newData in bestPath.newData[nodeName]:
                        if newData not in dataAvailable:
                            dataToProduce.update([newData])
                if nextNodeName:
                    self._execution[nextNodeName]['input'].update(bestPath.newData[nodeName])
                    if nextNodeName in bestPath.extraData:
                        self._execution[nextNodeName]['input'].update(bestPath.extraData[nodeName])
                # Add any extra data we need (from multi-exit nodes) to the data to produce list
                for extraNodeData in bestPath.extraData.itervalues():
                    for extra in extraNodeData:
                        if extra not in dataAvailable:
                            dataToProduce.update([extra])
                            
        # Now remove the fake data objects from activated nodes
        for node, props in self._execution.iteritems():
            msg.debug('Removing fake data from node {0}'.format(node))
            props['input'] -= set(['inNULL', 'outNULL'])
            props['output'] -= set(['inNULL', 'outNULL'])

        msg.debug('Execution dictionary: {0}'.format(self._execution))
Example #12
0
    def classicSingleEltree(self, filename, fast=False):
        if filename not in self._fileArg.value:
            raise trfExceptions.TransformReportException(
                trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'),
                'Unknown file ({0}) in the file report for {1}'.format(
                    filename, self._fileArg))
        tree = ElementTree.Element('File',
                                   ID=str(
                                       self._fileArg.getSingleMetadata(
                                           fname=filename,
                                           metadataKey='file_guid',
                                           populate=not fast)))
        logical = ElementTree.SubElement(tree, 'logical')
        lfn = ElementTree.SubElement(logical, 'lfn', name=filename)
        for myKey, classicKey in self._internalToClassicMap.iteritems():
            # beam_type is tricky - we return only the first list value,
            # (but remember, protect against funny stuff!)
            if myKey is 'beam_type':
                beamType = self._fileArg.getSingleMetadata(fname=filename,
                                                           metadataKey=myKey,
                                                           populate=not fast)
                if isinstance(beamType, list):
                    if len(beamType) is 0:
                        ElementTree.SubElement(tree,
                                               'metadata',
                                               att_name=classicKey,
                                               att_value='')
                    else:
                        ElementTree.SubElement(tree,
                                               'metadata',
                                               att_name=classicKey,
                                               att_value=str(beamType[0]))
                else:
                    # This is really not normal, but best we can do is str conversion
                    ElementTree.SubElement(tree,
                                           'metadata',
                                           att_name=classicKey,
                                           att_value=str(beamType))
            else:
                ElementTree.SubElement(tree,
                                       'metadata',
                                       att_name=classicKey,
                                       att_value=str(
                                           self._fileArg.getSingleMetadata(
                                               fname=filename,
                                               metadataKey=myKey,
                                               populate=not fast)))
        # Now add the metadata which is stored at the whole argument level
        ElementTree.SubElement(tree,
                               'metadata',
                               att_name='fileType',
                               att_value=str(self._fileArg.type))
        if self._fileArg.dataset is not None:
            ElementTree.SubElement(tree,
                                   'metadata',
                                   att_name='dataset',
                                   att_value=self._fileArg.dataset)

        return tree
Example #13
0
 def exitCode(self):
     if self._exitCode == None:
         msg.warning(
             'Transform exit code getter: _exitCode is unset, returning "TRF_UNKNOWN"'
         )
         return trfExit.nameToCode('TRF_UNKNOWN')
     else:
         return self._exitCode
Example #14
0
 def _doSteering(self, steeringDict=None):
     if not steeringDict:
         steeringDict = self._argdict['steering'].value
     for substep, steeringValues in steeringDict.iteritems():
         foundSubstep = False
         for executor in self._executors:
             if executor.name == substep or executor.substep == substep:
                 foundSubstep = True
                 msg.debug('Updating {0} with {1}'.format(
                     executor.name, steeringValues))
                 # Steering consists of tuples with (in/out, +/-, datatype)
                 for steeringValue in steeringValues:
                     if steeringValue[0] == 'in':
                         startSet = executor.inData
                     else:
                         startSet = executor.outData
                     origLen = len(startSet)
                     msg.debug('Data values to be modified are: {0}'.format(
                         startSet))
                     if steeringValue[1] is '+':
                         startSet.add(steeringValue[2])
                         if len(startSet) != origLen + 1:
                             raise trfExceptions.TransformSetupException(
                                 trfExit.nameToCode(
                                     'TRF_GRAPH_STEERING_ERROR'),
                                 'Attempting to add data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype already there?'
                                 .format(steeringValue[2], executor.name,
                                         steeringValue[1], startSet))
                     else:
                         startSet.discard(steeringValue[2])
                         if len(startSet) != origLen - 1:
                             raise trfExceptions.TransformSetupException(
                                 trfExit.nameToCode(
                                     'TRF_GRAPH_STEERING_ERROR'),
                                 'Attempting to remove data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype even present?'
                                 .format(steeringValue[2], executor.name,
                                         steeringValue[1], startSet))
                 msg.debug('Updated data values to: {0}'.format(startSet))
         if not foundSubstep:
             raise trfExceptions.TransformSetupException(
                 trfExit.nameToCode('TRF_GRAPH_STEERING_ERROR'),
                 'This transform has no executor/substep {0}'.format(
                     substep))
Example #15
0
    def __init__(self, executorSet, inputData=set([]), outputData=set([])):

        # Set basic node list
        self._nodeDict = {}

        msg.info('Transform graph input data: {0}; output data {1}'.format(
            inputData, outputData))

        if len(executorSet) == 1:
            # Single executor - in this case inData/outData is not mandatory, so we set them to the
            # input/output data of the transform
            executor = list(executorSet)[0]
            if len(executor._inData) == 0 and len(executor._outData) == 0:
                executor.inData = inputData
                executor.outData = outputData

        for executor in executorSet:
            self.addNode(executor)

        self._inputData = set(inputData)
        self._outputData = set(outputData)

        # It's forbidden for a transform to consume and produce the same datatype
        dataOverlap = self._inputData & self._outputData
        if len(dataOverlap) > 0:
            raise trfExceptions.TransformSetupException(
                trfExit.nameToCode('TRF_GRAPH_ERROR'),
                'Transform definition error, you cannot produce and consume the same datatypes in a transform. Duplicated input/output types {0}.'
                .format(' '.join(dataOverlap)))

        # Add a pseudo-start/stop nodes, from which input data flows and output data finally arrives
        # This makes the graph 'concrete' for this job
        # This is useful as then data edges all connect properly to a pair of nodes
        # We add a node for every possible output as this enables topo sorting of the graph
        # nodes for any intermediate data end nodes as well
        pseudoNodes = dict()
        pseudoNodes['_start'] = graphNode(name='_start',
                                          inData=[],
                                          outData=self._inputData,
                                          weight=0)
        for node in itervalues(self._nodeDict):
            for dataType in node.outputDataTypes:
                endNodeName = '_end_{0}'.format(dataType)
                pseudoNodes[endNodeName] = graphNode(name=endNodeName,
                                                     inData=[dataType],
                                                     outData=[],
                                                     weight=0)
        self._nodeDict.update(pseudoNodes)

        # Toposort not yet done
        self._toposort = []
        self._toposortData = []

        # Now find connections between nodes
        self.findConnections()
Example #16
0
 def test_illegalName(self):
     cmd = ['Athena_tf.py', '--DBRelease', 'FailMeHarder']
     msg.info('Will run this transform: {0}'.format(cmd))
     p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1)
     while p.poll() is None:
         line = p.stdout.readline()
         sys.stdout.write(line)
     # Hoover up remaining buffered output lines
     for line in p.stdout:
         sys.stdout.write(line)
     self.assertEqual(p.returncode, trfExit.nameToCode('TRF_DBRELEASE_PROBLEM'))
Example #17
0
    def _tracePath(self):
        self._executorGraph.findExecutionPath()

        self._executorPath = self._executorGraph.execution
        if len(self._executorPath) is 0:
            raise trfExceptions.TransformSetupException(
                trfExit.nameToCode('TRF_SETUP'),
                'Execution path finding resulted in no substeps being executed'
                '(Did you correctly specify input data for this transform?)')
        # Tell the first executor that they are the first
        self._executorDictionary[self._executorPath[0]
                                 ['name']].conf.firstExecutor = True
Example #18
0
    def singleFilePython(self,
                         filename,
                         fast=False,
                         type='full',
                         basename=True):
        if filename not in self._fileArg.value:
            raise trfExceptions.TransformReportException(
                trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'),
                'Unknown file ({0}) in the file report for {1}'.format(
                    filename, self._fileArg))
        if basename:
            entry = {'name': os.path.basename(filename)}
        else:
            entry = {'name': os.path.relpath(os.path.normpath(filename))}
        if type == 'name':
            # For 'name' we return only the GUID
            entry.update(
                self._fileArg.getMetadata(files=filename,
                                          populate=not fast,
                                          metadataKeys=['file_guid'
                                                        ])[filename])
        elif type == 'full':
            # Suppress io because it's the key at a higher level and _exists because it's internal
            entry.update(
                self._fileArg.getMetadata(files=filename,
                                          populate=not fast,
                                          maskMetadataKeys=[
                                              'io', '_exists', 'integrity',
                                              'file_type'
                                          ])[filename])
        else:
            raise trfExceptions.TransformReportException(
                trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'),
                'Unknown file report type ({0}) in the file report for {1}'.
                format(type, self._fileArg))

        return entry
Example #19
0
def writeTranslate(runTranslate, runArgs, name, substep, first, output):
    msg.info('Writing options to file \"%s\"' % runTranslate)

    option = getOption(runArgs, name, substep, first, output)

    msg.info('Options set to: \"%s\":' % option)

    with open(runTranslate, 'w') as runTranslateFile:
        try:
            print >> runTranslateFile, os.linesep, "option = ", option
        except (IOError, OSError) as e:
            errMsg = 'Got an error when writing JO template {0}: {1}'.format(
                runTranslateFile, e)
            msg.error(errMsg)
            raise trfExceptions.TransformExecutionException(
                trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)
Example #20
0
    def writeJSONReport(self,
                        filename,
                        sort_keys=True,
                        indent=2,
                        fast=False,
                        fileReport=defaultFileReport):
        with open(filename, 'w') as report:
            try:
                if not self._dataDictionary:
                    self._dataDictionary = self.python(fast=fast,
                                                       fileReport=fileReport)

                json.dump(self._dataDictionary,
                          report,
                          sort_keys=sort_keys,
                          indent=indent)
            except TypeError as e:
                # TypeError means we had an unserialisable object - re-raise as a trf internal
                message = 'TypeError raised during JSON report output: {0!s}'.format(
                    e)
                msg.error(message)
                raise trfExceptions.TransformReportException(
                    trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), message)
Example #21
0
        def funcWithTimeout(*args, **kwargs):
            ltimeout = timeout
            lretry = retry
            ltimefactor = timefactor
            lsleeptime = sleeptime
            ldefaultrc = defaultrc

            if 'timeout' in kwargs:
                ltimeout = kwargs.pop('timeout')
            if 'retry' in kwargs:
                lretry = kwargs.pop('retry')
            if 'timefactor' in kwargs:
                ltimefactor = kwargs.pop('timefactor')
            if 'sleeptime' in kwargs:
                lsleeptime = kwargs.pop('sleeptime')
            if 'defaultrc' in kwargs:
                ldefaultrc = kwargs.pop('defaultrc')

            if ltimeout is None:
                # Run function normally with no timeout wrapper
                msg.debug('Running {0}: {1} {2} without timeout'.format(
                    func, args, kwargs))
                return func(*args, **kwargs)

            n = 0
            while n <= lretry:
                msg.info('Try %i out of %i (time limit %s s) to call %s.',
                         n + 1, retry + 1, ltimeout, func.__name__)
                starttime = time.time()
                q = mp.Queue(maxsize=1)
                nargs = (q, ) + args
                proc = mp.Process(target=funcWithQueue,
                                  args=nargs,
                                  kwargs=kwargs)
                proc.start()
                try:
                    # Wait for function to run and return, but with a timeout
                    flag, result = q.get(block=True, timeout=ltimeout)
                    proc.join(60)
                    msg.info('Executed call within %d s.',
                             time.time() - starttime)
                    if flag:
                        return result
                    else:
                        msg.warning(
                            'But an exception occurred in function %s.',
                            func.__name__)
                        msg.warning('Returning default return code %s.',
                                    ldefaultrc)
                        return ldefaultrc
                except queue.Empty:
                    # Our function did not run in time - kill increase timeout
                    msg.warning(
                        'Timeout limit of %d s reached. Kill subprocess and its children.',
                        ltimeout)
                    parent = proc.pid
                    pids = [parent]
                    pids.extend(
                        trfUtils.listChildren(parent=parent,
                                              listOrphans=False))
                    trfUtils.infanticide(pids)
                    proc.join(60)  # Ensure cleanup
                    if n != lretry:
                        msg.info('Going to sleep for %d s.', lsleeptime)
                        time.sleep(lsleeptime)
                    n += 1
                    ltimeout *= ltimefactor
                    lsleeptime *= ltimefactor
                except IOError:
                    errMsg = "IOError while communicating with subprocess"
                    msg.error(errMsg)
                    raise TransformInternalException(
                        trfExit.nameToCode("TRF_EXTERNAL"), errMsg)

            msg.warning('All %i tries failed!', n)
            raise TransformTimeoutException(
                trfExit.nameToCode('TRF_EXEC_TIMEOUT'),
                'Timeout in function %s' % (func.__name__))
Example #22
0
    def _bestPath(self, data, dataAvailable, startNodeName = '_start', endNodeName = None):
        
        if endNodeName is None:
            endNodeName = '_end_{0}'.format(data)
        
        if endNodeName not in self._nodeDict:
            raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                'Node {0} was not found - the transform data connection definition is broken'.format(endNodeName))

        
        # Set of all considered paths
        # Initialise this with our endNode name - algorithm works back to the start
        pathSet = [graphPath(endNodeName, data),]
        
        msg.debug('Started path finding with seed path {0}'.format(pathSet[0]))
        
        # Halting condition - only one path and its first element is startNodeName
        while len(pathSet) > 1 or pathSet[0].path[0] is not startNodeName:
            msg.debug('Starting best path iteration with {0} paths in {1}'.format(len(pathSet), pathSet))
            # Copy the pathSet to do this, as we will update it
            for path in pathSet[:]:
                msg.debug('Continuing path finding with path {0}'.format(path))
                currentNodeName = path.path[0]
                if currentNodeName is startNodeName:
                    msg.debug('Path {0} has reached the start node - finished'.format(path))
                    continue
                # If there are no paths out of this node then it's a dead end - kill it
                if len(self._nodeDict[currentNodeName].connections['in']) is 0:
                    msg.debug('Path {0} is a dead end - removing'.format(path))
                    pathSet.remove(path)
                    continue
                # If there is only one path out of this node, we extend it
                if len(self._nodeDict[currentNodeName].connections['in']) is 1:
                    msg.debug('Single exit from path {0} - adding connection to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0]))
                    self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0])
                    continue
                # Else we need to clone the path for each possible exit
                msg.debug('Multiple exits from path {0} - will clone for each extra exit'.format([path]))
                for nextNodeName in self._nodeDict[currentNodeName].connections['in'].keys()[1:]:
                    newPath = copy.deepcopy(path)
                    msg.debug('Cloned exit from path {0} to {1}'.format(newPath, nextNodeName))             
                    self._extendPath(newPath, currentNodeName, nextNodeName)
                    pathSet.append(newPath)
                # Finally, use the original path to extend along the first node exit
                msg.debug('Adding exit from original path {0} to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0]))             
                self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0])

            # Now compare paths which made it to the end - only keep the shortest
            lowestCostPath = None
            for path in pathSet[:]:
                currentNodeName = path.path[0]
                if currentNodeName is startNodeName:
                    if lowestCostPath is None:
                        lowestCostPath = path
                        continue
                    if path.cost >= lowestCostPath.cost:
                        msg.debug('Path {0} is no cheaper than best path {1} - removing'.format(path, lowestCostPath))
                        pathSet.remove(path)
                    else:
                        msg.debug('Path {0} is cheaper than previous best path {1} - removing previous'.format(path, lowestCostPath))
                        pathSet.remove(lowestCostPath)
                        lowestCostPath = path
    
            # Emergency break
            if len(pathSet) == 0:
                raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                            'No path found between {0} and {1} for {2}'.format(startNodeName, endNodeName, data))
        return pathSet[0]
Example #23
0
    def parseCmdLineArgs(self, args):
        msg.info('Transform command line was: %s' %
                 ' '.join(shQuoteStrings(sys.argv)))

        try:
            # Use the argparse infrastructure to get the actual command line arguments
            self._argdict = vars(self.parser.parse_args(args))

            # Need to know if any input or output files were set - if so then we suppress the
            # corresponding parameters from AMI
            inputFiles = outputFiles = False
            for k, v in self._argdict.iteritems():
                if k.startswith('input') and isinstance(v, argFile):
                    inputFiles = True
                elif k.startswith('output') and isinstance(v, argFile):
                    outputFiles = True
            msg.debug("CLI Input files: {0}; Output files {1}".format(
                inputFiles, outputFiles))

            # Now look for special arguments, which expand out to other parameters
            # Note that the pickled argdict beats AMIConfig because dict.update() will overwrite
            # (However, we defend the real command line against updates from either source)
            extraParameters = {}
            # AMI configuration?
            if 'AMIConfig' in self._argdict:
                msg.debug('Given AMI tag configuration {0}'.format(
                    self._argdict['AMIConfig']))
                from PyJobTransforms.trfAMI import TagInfo
                tag = TagInfo(self._argdict['AMIConfig'].value)
                updateDict = {}
                for k, v in dict(tag.trfs[0]).iteritems():
                    # Convert to correct internal key form
                    k = cliToKey(k)
                    if inputFiles and k.startswith('input'):
                        msg.debug(
                            'Suppressing argument {0} from AMI'
                            ' because input files have been specified on the command line'
                            .format(k))
                        continue
                    if outputFiles and k.startswith('output'):
                        msg.debug(
                            'Suppressing argument {0} from AMI'
                            ' because output files have been specified on the command line'
                            .format(k))
                        continue
                    updateDict[k] = v
                extraParameters.update(updateDict)

            # JSON arguments?
            if 'argJSON' in self._argdict:
                try:
                    import json
                    msg.debug('Given JSON encoded arguments in {0}'.format(
                        self._argdict['argJSON']))
                    argfile = open(self._argdict['argJSON'], 'r')
                    jsonParams = json.load(argfile)
                    msg.debug('Read: {0}'.format(jsonParams))
                    extraParameters.update(convertToStr(jsonParams))
                    argfile.close()
                except Exception, e:
                    raise trfExceptions.TransformArgException(
                        trfExit.nameToCode('TRF_ARG_ERROR'),
                        'Error when deserialising JSON file {0} ({1})'.format(
                            self._argdict['argJSON'], e))

            # Event Service
            if 'eventService' in self._argdict and self._argdict[
                    'eventService'].value:
                updateDict = {}
                updateDict['athenaMPMergeTargetSize'] = '*:0'
                updateDict['checkEventCount'] = False
                updateDict['outputFileValidation'] = False
                extraParameters.update(updateDict)

            # Process anything we found
            for k, v in extraParameters.iteritems():
                msg.debug(
                    'Found this extra argument: {0} with value: {1} ({2})'.
                    format(k, v, type(v)))
                if k not in self.parser._argClass:
                    raise trfExceptions.TransformArgException(
                        trfExit.nameToCode('TRF_ARG_ERROR'),
                        'Argument "{0}" not known (try "--help")'.format(k))
                if k in self._argdict:
                    msg.debug(
                        'Ignored {0}={1} as extra parameter because this argument was given on the command line.'
                        .format(k, v))
                    continue
                # For callable classes we instantiate properly, otherwise we set the value for simple arguments
                if '__call__' in dir(self.parser._argClass[k]):
                    self._argdict[k] = self.parser._argClass[k](v)
                else:
                    self._argdict[k] = v
                msg.debug('Argument {0} set to {1}'.format(
                    k, self._argdict[k]))

            # Set the key name as an argument property - useful to be able to look bask at where this
            # argument came from
            for k, v in self._argdict.iteritems():
                if isinstance(v, argument):
                    v.name = k

            # Now we parsed all arguments, if a pickle/json dump is requested do it here and exit
            if 'dumpPickle' in self._argdict:
                msg.info('Now dumping pickled version of command line to {0}'.
                         format(self._argdict['dumpPickle']))
                pickledDump(self._argdict)
                sys.exit(0)

            # Now we parsed all arguments, if a pickle/json dump is requested do it here and exit
            if 'dumpJSON' in self._argdict:
                msg.info(
                    'Now dumping JSON version of command line to {0}'.format(
                        self._argdict['dumpJSON']))
                JSONDump(self._argdict)
                sys.exit(0)
Example #24
0
    def classicPython(self, fast=False):
        # Things we can get directly from the transform
        trfDict = {
            'jobInputs': [],  # Always empty?
            'jobOutputs': [],  # Filled in below...
            'more': {
                'Machine': 'unknown'
            },
            'trfAcronym': trfExit.codeToName(self._trf.exitCode),
            'trfCode': self._trf.exitCode,
            'trfExitCode': self._trf.exitCode,
        }

        if self._trf.lastExecuted is not None:
            trfDict.update({
                'athAcronym': self._trf.lastExecuted.errMsg,
                'athCode': self._trf.lastExecuted.rc
            })

        # Emulate the NEEDCHECK behaviour
        if hasattr(self._trf, '_executorPath'):
            for executor in self._trf._executorPath:
                if hasattr(executor, '_logScan') and self._trf.exitCode == 0:
                    if executor._logScan._levelCounter[
                            'FATAL'] > 0 or executor._logScan._levelCounter[
                                'CRITICAL'] > 0:
                        # This should not happen!
                        msg.warning(
                            'Found FATAL/CRITICAL errors and exit code 0 - reseting to TRF_LOGFILE_FAIL'
                        )
                        self._trf.exitCode = trfExit.nameToCode(
                            'TRF_LOGFILE_FAIL')
                        trfDict['trfAcronym'] = 'TRF_LOGFILE_FAIL'
                    elif executor._logScan._levelCounter['ERROR'] > 0:
                        msg.warning(
                            'Found errors in logfile scan - changing exit acronymn to NEEDCHECK.'
                        )
                        trfDict['trfAcronym'] = 'NEEDCHECK'

        # Now add files
        fileArgs = self._trf.getFiles(io='output')
        for fileArg in fileArgs:
            # N.B. In the original Tier 0 gpickles there was executor
            # information added for each file (such as autoConfiguration, preExec).
            # However, Luc tells me it is ignored, so let's not bother.
            trfDict['jobOutputs'].extend(
                trfFileReport(fileArg).classicPython(fast=fast))
            # AMITag and friends is added per-file, but it's known only to the transform, so set it here:
            for argdictKey in (
                    'AMITag',
                    'autoConfiguration',
            ):
                if argdictKey in self._trf.argdict:
                    trfDict['jobOutputs'][-1]['more']['metadata'][
                        argdictKey] = self._trf.argdict[argdictKey].value
            # Mangle substep argumemts back to the old format
            for substepKey in ('preExec', 'postExec', 'preInclude',
                               'postInclude'):
                if substepKey in self._trf.argdict:
                    for substep, values in iteritems(
                            self._trf.argdict[substepKey].value):
                        if substep == 'all':
                            trfDict['jobOutputs'][-1]['more']['metadata'][
                                substepKey] = values
                        else:
                            trfDict['jobOutputs'][-1]['more']['metadata'][
                                substepKey + '_' + substep] = values

        # Now retrieve the input event count
        nentries = 'UNKNOWN'
        for fileArg in self._trf.getFiles(io='input'):
            thisArgNentries = fileArg.nentries
            if isinstance(thisArgNentries, int):
                if nentries == 'UNKNOWN':
                    nentries = thisArgNentries
                elif thisArgNentries != nentries:
                    msg.warning(
                        'Found a file with different event count than others: {0} != {1} for {2}'
                        .format(thisArgNentries, nentries, fileArg))
                    # Take highest number?
                    if thisArgNentries > nentries:
                        nentries = thisArgNentries
        trfDict['nevents'] = nentries

        # Tier 0 expects the report to be in a top level dictionary under the prodsys key
        return {'prodsys': trfDict}
Example #25
0
    def python(self, fast=False, type='full'):
        # First entity contains shared properties - same for all files in this argFile
        if type == 'name':
            fileArgProps = {
                'dataset': self._fileArg.dataset,
                'nentries': self._fileArg.getnentries(fast),
                'subFiles': []
            }
        elif type == 'full':
            fileArgProps = {
                'dataset': self._fileArg.dataset,
                'type': self._fileArg.type,
                'subFiles': [],
                'argName': self._fileArg.name,
            }
        else:
            raise trfExceptions.TransformReportException(
                trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'),
                'Unknown file report type ({0}) in the file report for {1}'.
                format(type, self._fileArg))

        ## @note We try to strip off the path when there are multiple files to be reported on,
        #  however we should not do this if any of the files share a basename or anything is
        #  in a different directory
        uniqueBasenames = set(
            [os.path.basename(fname) for fname in self._fileArg.value])
        uniqueDirectories = set([
            os.path.dirname(os.path.relpath(os.path.normpath(fname)))
            for fname in self._fileArg.value
        ])
        if len(uniqueBasenames) != len(self._fileArg.value):
            msg.info(
                'Detected two files with the same basename in a file argument - report for file {0} will be produced with the path as a key'
                .format(self._fileArg))
            basenameReport = False
        elif len(uniqueDirectories) > 1:
            msg.warning(
                'Detected output files in different directories - report for file {0} will be produced with the path as a key'
                .format(self._fileArg))
            basenameReport = False
        else:
            basenameReport = True
        suppressed = []
        for fname in self._fileArg.value:
            subFile = None
            if basenameReport:
                subFile = self.singleFilePython(fname, fast=fast, type=type)
            else:
                subFile = self.singleFilePython(fname,
                                                fast=fast,
                                                type=type,
                                                basename=False)
            if subFile is not None:
                # if nentries == 0 for DRAW, suppress subfile from report
                if 'nentries' in subFile and subFile[
                        'nentries'] == 0 and isinstance(
                            self._fileArg, trfArgClasses.argBSFile):
                    msg.info('Suppressing file {0}, nentries is 0'.format(
                        subFile['name']))
                    suppressed.append(subFile['name'])
                else:
                    fileArgProps['subFiles'].append(subFile)

        return fileArgProps
Example #26
0
import ast
import json
import os
import traceback
from json import dumps

import logging
msg = logging.getLogger(__name__)

from PyJobTransforms.trfExceptions import TransformAMIException
from PyJobTransforms.trfDefaultFiles import getInputFileName, getOutputFileName
from PyJobTransforms.trfUtils import convertToStr

from PyJobTransforms.trfExitCodes import trfExit
AMIerrorCode = trfExit.nameToCode('TRF_AMI_ERROR')


## @brief Stores the configuration of a transform
class TrfConfig:
    def __init__(self):
        self.name = None
        self.release = None
        self.physics = {}
        self.inFiles = {}
        self.outFiles = {}
        self.outputs = {}
        self.inDS = None
        self.outfmts = []
        self.newTransform = False
Example #27
0
    def writeRunArgs(self, input=dict(), output=dict()):
        msg.info('Writing runArgs to file \"%s\"', self._runArgsFile)

        ## Check consistency btw --CA flag and provided skeletons:
        if 'CA' in self._exe.conf.argdict:
            if self._exe._skeletonCA is None:
                errMsg = "Got the --CA option but this transform doesn't supply a ComponentAccumulator-based skeleton file"
                msg.error(errMsg)
                raise trfExceptions.TransformExecutionException(
                    trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)
        else:  # 'CA' not in self._exe.conf.argdict
            if self._exe._skeleton is None:
                errMsg = "No --CA option given, but this transform doesn't supply old-style skeleton file"
                msg.error(errMsg)
                raise trfExceptions.TransformExecutionException(
                    trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)

        with open(self._runArgsFile, 'w') as runargsFile:
            try:
                # First write a little header
                print(os.linesep.join(
                    ("# Run arguments file auto-generated on {0} by:".format(
                        time.asctime()),
                     "# JobTransform: {0}".format(self._exe.name),
                     "# Version: {0}".format(self._version))),
                      file=runargsFile)

                # Now make sure we import the runArgs class for out job options
                print(os.linesep.join(
                    ("# Import runArgs class",
                     "from PyJobTransforms.trfJobOptions import RunArguments",
                     "{0} = RunArguments()".format(self._runArgsName))),
                      file=runargsFile)

                # Handy to write the substep name here as it can be used as (part of) a random seed
                # in some cases
                print('{0}.trfSubstepName = {1!r}'.format(
                    self._runArgsName, self._exe.name),
                      os.linesep,
                      file=runargsFile)

                # Now loop over the core argdict and see what needs to be given as a runArg
                declaredRunargs = []
                for k, v in iteritems(self._exe.conf.argdict):
                    # Check if this arg is supposed to be in runArgs
                    if isinstance(v, trfArgClasses.argument) and v.isRunarg:
                        # Files handled later
                        if isinstance(v, trfArgClasses.argFile):
                            continue

                        msg.debug(
                            'Argument {0} is a runarg, will be added to JO file (value {1})'
                            .format(k, v.value))

                        ## @note Substep type arguments are rather special, they apply to only named
                        #  executors or substeps. We use the returnMyValue() method to sort out what
                        #  specific value applies to us
                        if isinstance(v, trfArgClasses.argSubstep):
                            myValue = v.returnMyValue(exe=self._exe)
                            if myValue is not None:
                                print("{0}.{1!s} = {2!r}".format(
                                    self._runArgsName, k, myValue),
                                      file=runargsFile)
                                msg.debug(
                                    'Added substep type argument {0} as: {1}'.
                                    format(k, myValue))
                                declaredRunargs.append(k)
                        else:
                            print("{0}.{1!s} = {2!r}".format(
                                self._runArgsName, k, v.value),
                                  file=runargsFile)
                            declaredRunargs.append(k)
                    else:
                        msg.debug(
                            'Argument {0} is not a runarg - ignored'.format(k))

                # Now make sure that if we did not add maxEvents  then we set this to -1, which
                # avoids some strange defaults that only allow 5 events to be processed
                if 'maxEvents' not in declaredRunargs:
                    print(os.linesep.join((
                        "",
                        "# Explicitly added to process all events in this step",
                        "{0}.maxEvents = -1".format(self._runArgsName),
                    )),
                          file=runargsFile)

                # Now deal with our input and output files
                print(os.linesep, "# Input data", file=runargsFile)
                for dataType, dataArg in iteritems(input):
                    print('{0}.input{1}File = {2!r}'.format(
                        self._runArgsName, dataType, dataArg.value),
                          file=runargsFile)
                    print('{0}.input{1}FileType = {2!r}'.format(
                        self._runArgsName, dataType, dataArg.type),
                          file=runargsFile)
                    # Add the input event count, if we know it
                    if dataArg.isCached(metadataKeys=['nentries']):
                        print('{0}.input{1}FileNentries = {2!r}'.format(
                            self._runArgsName, dataType, dataArg.nentries),
                              file=runargsFile)
                    print("{0}.{1}FileIO = {2!r}".format(
                        self._runArgsName, dataType,
                        self._exe.conf.dataDictionary[dataType].io),
                          file=runargsFile)

                print(os.linesep, "# Output data", file=runargsFile)
                for dataType, dataArg in iteritems(output):
                    # Need to be careful to convert _output_ filename as a strings, not a list
                    print('{0}.output{1}File = {2!r}'.format(
                        self._runArgsName, dataType, dataArg.value[0]),
                          file=runargsFile)
                    print('{0}.output{1}FileType = {2!r}'.format(
                        self._runArgsName, dataType, dataArg.type),
                          file=runargsFile)

                # Process all of the tweaky special runtime arguments
                print(os.linesep, "# Extra runargs", file=runargsFile)
                ## @note extraRunargs are passed using repr, i.e., they should be constants
                for k, v in iteritems(self._exe._extraRunargs):
                    ## @note: What to do if this is a CLI argument as well, in particular
                    #  for arguments like preExec we want to add to the list, not replace it
                    if k in declaredRunargs:
                        if isinstance(self._exe.conf.argdict[k].value, list):
                            msg.debug('Extending runarg {0!s}={1!r}'.format(
                                k, v))
                            print('{0}.{1!s}.extend({2!r})'.format(
                                self._runArgsName, k, v),
                                  file=runargsFile)
                    else:
                        msg.debug('Adding runarg {0!s}={1!r}'.format(k, v))
                        print('{0}.{1!s} = {2!r}'.format(
                            self._runArgsName, k, v),
                              file=runargsFile)

                ## @note runtime runargs are passed as strings, i.e., they can be evaluated
                print(os.linesep, '# Extra runtime runargs', file=runargsFile)
                for k, v in iteritems(self._exe._runtimeRunargs):
                    # These options are string converted, not repred, so they can write an option
                    # which is evaluated at runtime
                    # Protect this with try: except: for the Embedding use case
                    msg.debug('Adding runarg {0!s}={1!r}'.format(k, v))
                    print(os.linesep.join(
                        ('try:', '    {0}.{1!s} = {2!s}'.format(
                            self._runArgsName, k, v), 'except AttributeError:',
                         '    printfunc ("WARNING - AttributeError for {0}")'.
                         format(k))),
                          file=runargsFile)

                ## @note Now write the literals into the runargs file
                if self._exe._literalRunargs is not None:
                    print(os.linesep,
                          '# Literal runargs snippets',
                          file=runargsFile)
                    for line in self._exe._literalRunargs:
                        print(line, file=runargsFile)

                ## Another special option - dataArgs are always written to the runargs file
                for dataType in self._exe._dataArgs:
                    print(os.linesep,
                          '# Forced data value arguments',
                          file=runargsFile)
                    if dataType in self._exe.conf.dataDictionary:
                        print('{0}.data{1}arg = {2!r}'.format(
                            self._runArgsName, dataType,
                            self._exe.conf.dataDictionary[dataType].value),
                              file=runargsFile)
                    else:
                        print(
                            '# Warning: data type "{0}" is not part of this transform'
                            .format(dataType),
                            file=runargsFile)

                # This adds the correct JO fragment for AthenaMP job, where we need to ask
                # the FileMgr to produce the requested log and report files
                # Also, aggregating the workers' logfiles into the mother's makes life
                # easier for debugging
                if self._exe._athenaMP:
                    print(os.linesep,
                          '# AthenaMP Options. nprocs = %d' %
                          self._exe._athenaMP,
                          file=runargsFile)
                    # Proxy for both options
                    print(os.linesep.join((
                        os.linesep,
                        'from AthenaMP.AthenaMPFlags import jobproperties as AthenaMPJobProps',
                        'AthenaMPJobProps.AthenaMPFlags.WorkerTopDir="{0}"'.
                        format(self._exe._athenaMPWorkerTopDir),
                        'AthenaMPJobProps.AthenaMPFlags.OutputReportFile="{0}"'
                        .format(self._exe._athenaMPFileReport),
                        'AthenaMPJobProps.AthenaMPFlags.EventOrdersFile="{0}"'.
                        format(self._exe._athenaMPEventOrdersFile),
                        'AthenaMPJobProps.AthenaMPFlags.CollectSubprocessLogs=True'
                    )),
                          file=runargsFile)
                    if self._exe._athenaMPStrategy:
                        # Beware of clobbering a non default value (a feature used by EventService)
                        print(
                            'if AthenaMPJobProps.AthenaMPFlags.Strategy.isDefault():',
                            file=runargsFile)
                        print(
                            '\tAthenaMPJobProps.AthenaMPFlags.Strategy="{0}"'.
                            format(self._exe._athenaMPStrategy),
                            file=runargsFile)
                    if self._exe._athenaMPReadEventOrders:
                        if os.path.isfile(self._exe._athenaMPEventOrdersFile):
                            print(
                                'AthenaMPJobProps.AthenaMPFlags.ReadEventOrders=True',
                                file=runargsFile)
                        else:
                            raise trfExceptions.TransformExecutionException(
                                trfExit.nameToCode("TRF_EXEC_RUNARGS_ERROR"),
                                "Failed to find file: {0} required by athenaMP option: --athenaMPUseEventOrders true"
                                .format(self._exe._athenaMPEventOrdersFile))
                    if 'athenaMPEventsBeforeFork' in self._exe.conf.argdict:
                        print(
                            'AthenaMPJobProps.AthenaMPFlags.EventsBeforeFork={0}'
                            .format(self._exe.conf.
                                    argdict['athenaMPEventsBeforeFork'].value),
                            file=runargsFile)
                if 'CA' in self._exe.conf.argdict:
                    print(os.linesep, '# Threading flags', file=runargsFile)
                    #Pass the number of threads
                    threads = self._exe._athenaMT
                    concurrentEvents = self._exe._athenaConcurrentEvents
                    msg.debug('Adding runarg {0!s}={1!r}'.format(
                        'threads', threads))
                    print('{0}.{1!s} = {2!r}'.format(self._runArgsName,
                                                     'threads', threads),
                          file=runargsFile)
                    msg.debug('Adding runarg {0!s}={1!r}'.format(
                        'concurrentEvents', concurrentEvents))
                    print('{0}.{1!s} = {2!r}'.format(self._runArgsName,
                                                     'concurrentEvents',
                                                     concurrentEvents),
                          file=runargsFile)
                    #ComponentAccumulator based config, import skeleton here:
                    print(os.linesep,
                          '# Import skeleton and execute it',
                          file=runargsFile)
                    print('from {0} import fromRunArgs'.format(
                        self._exe._skeletonCA),
                          file=runargsFile)
                    print('fromRunArgs({0})'.format(self._runArgsName),
                          file=runargsFile)

                msg.info('Successfully wrote runargs file {0}'.format(
                    self._runArgsFile))

            except (IOError, OSError) as e:
                errMsg = 'Got an error when writing JO template {0}: {1}'.format(
                    self._runArgsFile, e)
                msg.error(errMsg)
                raise trfExceptions.TransformExecutionException(
                    trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)
Example #28
0
def detectAthenaMTThreads(argdict={}):
    athenaMTThreads = 0
    athenaConcurrentEvents = 0

    # Try and detect if any AthenaMT has been enabled
    try:
        if 'athenaopts' in argdict:
            for substep in argdict['athenaopts'].value:
                threadArg = [
                    opt.replace("--threads=", "")
                    for opt in argdict['athenaopts'].value[substep]
                    if '--threads' in opt
                ]
                if len(threadArg) == 0:
                    athenaMTThreads = 0
                elif len(threadArg) == 1:
                    if 'multithreaded' in argdict:
                        raise ValueError(
                            "Detected conflicting methods to configure AthenaMT: --multithreaded and --threads=N (via athenaopts). Only one method must be used"
                        )
                    athenaMTThreads = int(threadArg[0])
                    if athenaMTThreads < -1:
                        raise ValueError(
                            "--threads was set to a value less than -1")
                else:
                    raise ValueError(
                        "--threads was set more than once in 'athenaopts'")
                msg.info(
                    'AthenaMT detected from "threads" setting with {0} threads for substep {1}'
                    .format(athenaMTThreads, substep))

                concurrentEventsArg = [
                    opt.replace("--concurrent-events=", "")
                    for opt in argdict['athenaopts'].value[substep]
                    if '--concurrent-events' in opt
                ]
                if len(concurrentEventsArg) == 1:
                    athenaConcurrentEvents = int(concurrentEventsArg[0])
                    if athenaConcurrentEvents < -1:
                        raise ValueError(
                            "--concurrent-events was set to a value less than -1"
                        )
                    msg.info(
                        'Custom concurrent event setting read from "concurrent-events" with {0} events for substep {1}'
                        .format(athenaConcurrentEvents, substep))
                else:
                    athenaConcurrentEvents = athenaMTThreads
        if (athenaMTThreads == 0 and 'ATHENA_CORE_NUMBER' in os.environ
                and 'multithreaded' in argdict):
            athenaMTThreads = int(os.environ['ATHENA_CORE_NUMBER'])
            if athenaMTThreads < -1:
                raise ValueError("ATHENA_CORE_NUMBER value was less than -1")
            msg.info(
                'AthenaMT detected from ATHENA_CORE_NUMBER with {0} threads'.
                format(athenaMTThreads))
            athenaConcurrentEvents = athenaMTThreads
    except ValueError as errMsg:
        myError = 'Problem discovering AthenaMT setup: {0}'.format(errMsg)
        raise trfExceptions.TransformExecutionException(
            trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), myError)

    return athenaMTThreads, athenaConcurrentEvents
Example #29
0
    def generateReport(self,
                       reportType=None,
                       fast=False,
                       fileReport=defaultFileReport):
        msg.debug('Transform report generator')

        if 'reportType' in self._argdict:
            if reportType is not None:
                msg.info(
                    'Transform requested report types {0} overridden by command line to {1}'
                    .format(reportType, self._argdict['reportType'].value))
            reportType = self._argdict['reportType'].value

        if reportType is None:
            reportType = [
                'json',
            ]
            # Only generate the Tier0 report at Tier0 ;-)
            # (It causes spurious warnings for some grid jobs with background files (e.g., digitisation)
            if 'TZHOME' in os.environ:
                reportType.append('gpickle')

            if not isInteractiveEnv():
                reportType.append('text')
                msg.debug(
                    'Detected Non-Interactive environment. Enabled text report'
                )

        if 'reportName' in self._argdict:
            baseName = classicName = self._argdict['reportName'].value
        else:
            baseName = 'jobReport'
            classicName = 'metadata'

        try:
            # Text: Writes environment variables and machine report in text format.
            if reportType is None or 'text' in reportType:
                envName = baseName if 'reportName' in self._argdict else 'env'  # Use fallback name 'env.txt' if it's not specified.
                self._report.writeTxtReport(filename='{0}.txt'.format(envName),
                                            fast=fast,
                                            fileReport=fileReport)
            # JSON
            if reportType is None or 'json' in reportType:
                self._report.writeJSONReport(
                    filename='{0}.json'.format(baseName),
                    fast=fast,
                    fileReport=fileReport)
            # Classic XML
            if reportType is None or 'classic' in reportType:
                self._report.writeClassicXMLReport(
                    filename='{0}.xml'.format(classicName), fast=fast)
            # Classic gPickle
            if reportType is None or 'gpickle' in reportType:
                self._report.writeGPickleReport(
                    filename='{0}.gpickle'.format(baseName), fast=fast)
            # Pickled version of the JSON report for pilot
            if reportType is None or 'pilotPickle' in reportType:
                self._report.writePilotPickleReport(
                    filename='{0}Extract.pickle'.format(baseName),
                    fast=fast,
                    fileReport=fileReport)

        except trfExceptions.TransformTimeoutException, reportException:
            msg.error('Received timeout when writing report ({0})'.format(
                reportException))
            msg.error(
                'Report writing is aborted - sorry. Transform will exit with TRF_METADATA_CALL_FAIL status.'
            )
            if ('orphanKiller' in self._argdict):
                infanticide(message=True, listOrphans=True)
            else:
                infanticide(message=True)
            sys.exit(trfExit.nameToCode('TRF_METADATA_CALL_FAIL'))
Example #30
0
def athenaMPOutputHandler(athenaMPFileReport, athenaMPWorkerTopDir, dataDictionary, athenaMPworkers, skipFileChecks = False, argdict = {}):
    msg.debug("MP output handler called for report {0} and workers in {1}, data types {2}".format(athenaMPFileReport, athenaMPWorkerTopDir, dataDictionary.keys()))
    outputHasBeenHandled = dict([ (dataType, False) for dataType in dataDictionary.keys() if dataDictionary[dataType] ])

    # if sharedWriter mode is active ignore athenaMPFileReport
    sharedWriter=False
    if 'sharedWriter' in argdict and argdict['sharedWriter'].value:
        sharedWriter=True
        skipFileChecks=True

    if not sharedWriter:
        # First, see what AthenaMP told us
        mpOutputs = ElementTree.ElementTree()
        try:
            mpOutputs.parse(athenaMPFileReport)
        except IOError:
            raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Missing AthenaMP outputs file {0} (probably athena crashed)".format(athenaMPFileReport))
        for filesElement in mpOutputs.getroot().getiterator(tag='Files'):
            msg.debug('Examining element {0} with attributes {1}'.format(filesElement, filesElement.attrib))
            originalArg = None 
            startName = filesElement.attrib['OriginalName']
            for dataType, fileArg in dataDictionary.iteritems():
                if fileArg.value[0] == startName:
                    originalArg = fileArg
                    outputHasBeenHandled[dataType] = True
                    break
            if originalArg is None:
                msg.warning('Found AthenaMP output with name {0}, but no matching transform argument'.format(startName))
                continue
        
            msg.debug('Found matching argument {0}'.format(originalArg))
            fileNameList = []
            for fileElement in filesElement.getiterator(tag='File'):
                msg.debug('Examining element {0} with attributes {1}'.format(fileElement, fileElement.attrib))
                fileNameList.append(path.relpath(fileElement.attrib['name']))

            athenaMPoutputsLinkAndUpdate(fileNameList, fileArg)

    # Now look for additional outputs that have not yet been handled
    if len([ dataType for dataType in outputHasBeenHandled if outputHasBeenHandled[dataType] is False]):
        # OK, we have something we need to search for; cache the dirwalk here
        MPdirWalk = [ dirEntry for dirEntry in os.walk(athenaMPWorkerTopDir) ]

        for dataType, fileArg in dataDictionary.iteritems():
            if outputHasBeenHandled[dataType]:
                continue
            if fileArg.io is "input":
                continue
            msg.info("Searching MP worker directories for {0}".format(dataType))
            startName = fileArg.value[0]
            fileNameList = []
            for entry in MPdirWalk:
                if "evt_count" in entry[0]:
                    continue
                if "range_scatterer" in entry[0]:
                    continue
                # N.B. AthenaMP may have made the output name unique for us, so 
                # we need to treat the original name as a prefix
                possibleOutputs = [ fname for fname in entry[2] if fname.startswith(startName) ]
                if len(possibleOutputs) == 0:
                    continue
                elif len(possibleOutputs) == 1:
                    fileNameList.append(path.join(entry[0], possibleOutputs[0]))
                elif skipFileChecks:
                    pass
                else:
                    raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found multiple matching outputs for datatype {0} in {1}: {2}".format(dataType, entry[0], possibleOutputs))
            if skipFileChecks:
                pass
            elif len(fileNameList) != athenaMPworkers:
                raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found {0} output files for {1}, expected {2} (found: {3})".format(len(fileNameList), dataType, athenaMPworkers, fileNameList))

            # Found expected number of files - good!
            athenaMPoutputsLinkAndUpdate(fileNameList, fileArg)