def appendToExecutorSet(self, executors): # Normalise to something iterable if isinstance(executors, transformExecutor): executors = [ executors, ] elif not isinstance(executors, (list, tuple, set)): raise trfExceptions.TransformInternalException( trfExit.nameToCode('TRF_INTERNAL'), 'Transform was initialised with an executor which was not a simple executor or an executor set' ) # TRY TO DEPRECATE SETTING trf IN THE EXECUTOR - USE CONF! # Executor book keeping: set parent link back to me for all executors # Also setup a dictionary, indexed by executor name and check that name is unique ## Setting conf here not working - too early to get the dataDictionary for executor in executors: executor.trf = self if executor.name in self._executorDictionary: raise trfExceptions.TransformInternalException( trfExit.nameToCode('TRF_INTERNAL'), 'Transform has been initialised with two executors with the same name ({0})' ' - executor names must be unique'.format(executor.name)) self._executors.add(executor) self._executorDictionary[executor.name] = executor
def athenaMPoutputsLinkAndUpdate(newFullFilenames, fileArg): # Any files we link are numbered from 1, because we always set # the filename given to athena has _000 as a suffix so that the # mother process' file can be used without linking fileIndex = 1 linkedNameList = [] newFilenameValue = [] for fname in newFullFilenames: if path.dirname(fname) == "": linkedNameList.append(None) newFilenameValue.append(fname) else: linkName = "{0}{1:03d}".format(path.basename(fname).rstrip('0'), fileIndex) linkedNameList.append(linkName) newFilenameValue.append(linkName) fileIndex += 1 for linkname, fname in zip(linkedNameList, newFullFilenames): if linkname: if len(newFullFilenames) == 1: try: os.rename(fname,fileArg.originalName) newFilenameValue[0]=fileArg.originalName except OSError, e: raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Failed to move {0} to {1}: {2}".format(fname, linkname, e)) else: try: if path.lexists(linkname): os.unlink(linkname) os.symlink(fname, linkname) except OSError, e: raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Failed to link {0} to {1}: {2}".format(fname, linkname, e))
def doToposort(self): # We will manipulate the graph, so deepcopy it graphCopy = copy.deepcopy(self._nodeDict) # Find all valid start nodes in this graph - ones with no data dependencies themselves startNodeNames = [] for nodeName, node in iteritems(graphCopy): if len(node.connections['in']) == 0: startNodeNames.append(nodeName) if len(startNodeNames) == 0: raise trfExceptions.TransformGraphException( trfExit.nameToCode('TRF_GRAPH_ERROR'), 'There are no starting nodes in this graph - non-DAG graphs are not supported' ) msg.debug('Found this list of start nodes for toposort: {0}'.format( startNodeNames)) # The startNodeNames holds the list of nodes with their dependencies now satisfied (no input edges anymore) while len(startNodeNames) > 0: # Take the next startNodeName and zap it from the graph theNodeName = startNodeNames.pop() theNode = graphCopy[theNodeName] self._toposort.append(theNodeName) del graphCopy[theNodeName] # Now delete the edges this node was a source for msg.debug( 'Considering connections from node {0}'.format(theNodeName)) for connectedNodeName in theNode.connections['out']: graphCopy[connectedNodeName].delConnection(toExe=theNodeName, direction='in') # Look for nodes which now have their dependencies satisfied if len(graphCopy[connectedNodeName].connections['in']) == 0: startNodeNames.append(connectedNodeName) # If there are nodes left then the graph has cycles, which means it's not a DAG if len(graphCopy) > 0: raise trfExceptions.TransformGraphException( trfExit.nameToCode('TRF_GRAPH_ERROR'), 'Graph topological sort had no more start nodes, but nodes were left {0} - non-DAG graphs are not supported' .format(list(graphCopy))) msg.debug('Topologically sorted node order: {0}'.format( self._toposort)) # Now toposort the input data for nodes self._toposortData = [] for nodeName in self._toposort: # First add input data, then output data for dataType in self._nodeDict[nodeName].inputDataTypes: if dataType not in self._toposortData: self._toposortData.append(dataType) for dataType in self._nodeDict[nodeName].outputDataTypes: if dataType not in self._toposortData: self._toposortData.append(dataType) msg.debug('Topologically sorted data order: {0}'.format( self._toposortData))
def preExecute(self, input = set(), output = set()): msg.info('Preparing for BSJobSplitterExecutor execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output)) #See if we need to unpack a TAR file if 'hitarFile' in self.conf.argdict: print ("Untarring inputHITARFile", self.conf.argdict['hitarFile'].value) try: f=tarfile.open(name=self.conf.argdict['hitarFile'].value[0]) f.list() f.extractall() f.close() except Exception as e: raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error while unpacking and extracting HI input files for transform: {0}'.format(e)) # There are two ways to configure this transform: # - Give an inputZeroBiasBSFile argument directly # - Give a inputBSCONFIGFile and jobNumber argument # Check now that we have a configuration that works if 'inputZeroBiasBSFile' in self.conf.argdict and 'inputBSCONFIGFile' in self.conf.argdict: #raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Both inputZeroBiasBSFile and inputBSCONFIGFile have been specified - please use only one.') del self.conf.argdict['inputZeroBiasBSFile'] print ("WARNING - removed the inputZeroBiasBSFile argument, because inputZeroBiasBSFile and inputBSCONFIGFile were already specified") if 'inputBSCONFIGFile' in self.conf.argdict: if 'jobNumber' not in self.conf.argdict: raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'inputBSCONFIGFile is specified, but no jobNumber was given.') # Job number has to wrap around from 500, dropping back to 1 wrappedJobNumber = (self.conf.argdict['jobNumber'].value-1)%500 + 1 self._inputFilelist = 'filelist_{0}.txt'.format(wrappedJobNumber) self._lbnList = 'lbn_anal_map_{0}.txt'.format(wrappedJobNumber) try: print (self.conf.argdict['inputBSCONFIGFile'].value) f=tarfile.open(name=self.conf.argdict['inputBSCONFIGFile'].value[0]) f.extract('filelist_{0}.txt'.format(wrappedJobNumber)) f.extract('lbn_anal_map_{0}.txt'.format(wrappedJobNumber)) f.close() bsInputs = open(self._inputFilelist).readline().rstrip().split(',') self.conf.addToArgdict('inputZeroBiasBSFile', trfArgClasses.argBSFile(bsInputs, io='input', type='BS', subtype='BS_ZeroBias')) self.conf.addToDataDictionary('ZeroBiasBS', self.conf.argdict['inputZeroBiasBSFile']) input.add('ZeroBiasBS') msg.info('Validating resolved input bytestream files') trfValidation.performStandardFileValidation({'ZeroBiasBS': self.conf.argdict['inputZeroBiasBSFile']}, io='input') except Exception as e: raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error while unpacking and extracting input files for transform: {0}'.format(e)) # Now setup correct input arguments self.conf.argdict['InputLbnMapFile'] = trfArgClasses.argString(self._lbnList) self.conf.argdict['InputFileMapFile'] = trfArgClasses.argString(self._inputFilelist) else: #if 'lumiBlockMapFile' not in self.conf.argdict: # raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'inputZeroBiasBSFile is specified, but no lumiBlockMapFile was given.') #self.conf.argdict['InputLbnMapFile'] = self.conf.argdict['lumiBlockMapFile'] if 'lumiBlockMapFile' in self.conf.argdict: self.conf.argdict['InputLbnMapFile'] = self.conf.argdict['lumiBlockMapFile'] super(BSJobSplitterExecutor, self).preExecute(input=input, output=output)
def exception_wrapper(*args, **kwargs): # Setup imports which the wrapper needs import signal import traceback import PyJobTransforms.trfExceptions as trfExceptions try: return func(*args, **kwargs) except KeyboardInterrupt: msg.critical( 'Caught a keyboard interrupt - exiting at your request.') trfUtils.infanticide(message=True) sys.exit(128 + signal.SIGINT) # This subclass is treated as a 'normal' exit condition # but it should never happen in production as it's a transform definition error except trfExceptions.TransformSetupException as e: msg.critical('Transform setup failed: {0}'.format(e.errMsg)) msg.critical('To help you debug here is the stack trace:') msg.critical(traceback.format_exc(None)) msg.critical('(Early exit - no job report is produced)') trfUtils.infanticide(message=True) sys.exit(e.errCode) except trfExceptions.TransformException as e: msg.critical( 'Got a transform exception in the outer exception handler: {0!s}' .format(e)) msg.critical('Stack trace is...') msg.critical(traceback.format_exc(None)) msg.critical( 'Job reports are likely to be missing or incomplete - sorry') msg.critical('Please report this as a transforms bug!') trfUtils.infanticide(message=True) sys.exit(trfExit.nameToCode('TRF_UNEXPECTED_TRF_EXCEPTION')) except Exception as e: msg.critical( 'Got a general exception in the outer exception handler: {0!s}' .format(e)) msg.critical('Stack trace is...') msg.critical(traceback.format_exc(None)) msg.critical( 'Job reports are likely to be missing or incomplete - sorry') msg.critical('Please report this as a transforms bug!') trfUtils.infanticide(message=True) sys.exit(trfExit.nameToCode('TRF_UNEXPECTED_OTHER_EXCEPTION'))
def detectAthenaMPProcs(argdict = {}): athenaMPProcs = 0 # Try and detect if any AthenaMP has been enabled try: if 'ATHENA_PROC_NUMBER' in os.environ: athenaMPProcs = int(os.environ['ATHENA_PROC_NUMBER']) if athenaMPProcs < 0: raise ValueError("ATHENA_PROC_NUMBER value was less than zero") msg.info('AthenaMP detected from ATHENA_PROC_NUMBER with {0} workers'.format(athenaMPProcs)) elif 'athenaopts' in argdict: for substep in argdict['athenaopts'].value: procArg = [opt.replace("--nprocs=", "") for opt in argdict['athenaopts'].value[substep] if '--nprocs' in opt] if len(procArg) == 0: athenaMPProcs = 0 elif len(procArg) == 1: athenaMPProcs = int(procArg[0]) if athenaMPProcs < 0: raise ValueError("--nprocs was set to a value less than zero") else: raise ValueError("--nprocs was set more than once in 'athenaopts'") msg.info('AthenaMP detected from "nprocs" setting with {0} workers for substep {1}'.format(athenaMPProcs,substep)) except ValueError, errMsg: myError = 'Problem discovering AthenaMP setup: {0}'.format(errMsg) raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), myError)
def postExecute(self): msg.info("Check for trig_cost.root file") #costmon generates the file trig_cost.root #to save on panda it needs to be renamed via the outputNTUP_TRIGCOSTFile argument expectedFileName = 'trig_cost.root' #first check argument is in dict if 'outputNTUP_TRIGCOSTFile' in self.conf.argdict: #check file is created if (os.path.isfile(expectedFileName)): msg.info( 'Renaming %s to %s' % (expectedFileName, self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0])) try: os.rename( expectedFileName, self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0]) except OSError, e: raise trfExceptions.TransformExecutionException( trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'), 'Exception raised when renaming {0} to {1}: {2}'. format( expectedFileName, self.conf. argdict['outputNTUP_TRIGCOSTFile'].value[0], e)) else: msg.error( 'NTUP_TRIGCOST argument defined %s but %s not created' % (self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0], expectedFileName))
def checkFileList(filelist): """Converts list of files of type ds#filename into a list of filenames, meanwhile setting ds value. If check is true it also checks the existence of the files.""" # First check if type is list if not isinstance(filelist,list): filelist=[filelist] for i,ifile in enumerate(filelist): # extract ds,runnumber and svcclass filename=getDsFileName(ifile) # pass file check if file is on castor if filename.find('/castor',0,8) != -1: pass elif not fileutil.exists(filename): found = fileutil.exists_suffix_number(filename + '.') if not found: errMsg = filename+' not found' raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_INPUT_FILE_VALIDATION_FAIL'), errMsg) if found != filename: filename = found # correct filename in list filelist[i]=filename return filelist
def classicSinglePython(self, filename, fast=False): if filename not in self._fileArg.value: raise trfExceptions.TransformReportException( trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), 'Unknown file ({0}) in the file report for {1}'.format( filename, self._fileArg)) # Direct population of some keys fileDict = { 'lfn': filename, 'dataset': self._fileArg.dataset, } # Fill in the mapped 'primary' keys for myKey, classicKey in iteritems(self._internalToGpickleMap): fileDict[classicKey] = self._fileArg.getSingleMetadata( fname=filename, metadataKey=myKey, populate=not fast) if classicKey == 'checkSum' and fileDict[classicKey] == 'UNDEFINED': # Old style is that we give back None when we don't know fileDict[classicKey] = None elif fileDict[classicKey] == 'UNDEFINED': # Suppress things we don't generally expect to know del fileDict[classicKey] # Base 'more' stuff which is known by the argFile itself fileDict['more'] = {'metadata': {'fileType': self._fileArg.type}} for myKey, classicKey in iteritems(self._internalToGpickleMoreMap): value = self._fileArg.getSingleMetadata(fname=filename, metadataKey=myKey, populate=not fast) if value != 'UNDEFINED': fileDict['more']['metadata'][classicKey] = value return fileDict
def preExecute(self, input = set(), output = set()): # First we need to strip the filter file down to events that are present # in the RAW file we are going to skim. This is because the HI workflow # will provide millions of events in their filter file, more than acmd.py # can cope with. listEvtCommand = ['AtlListBSEvents.exe', '-l'] listEvtCommand.extend(self.conf.argdict['inputBSFile'].value) # For best lookup speed, we store the runnumber/eventnumber in a dictionary (set would also # be fast) rawEventList = {} try: for line in subprocess.check_output(listEvtCommand).split("\n"): if line.startswith("Index="): try: splitStrings = line.split(" ") runprefix, runstr = splitStrings[1].split("=") evtprefix, evtstr = splitStrings[2].split("=") # Check sanity if runprefix != "Run" or evtprefix != "Event": msg.warning("Failed to understand this line from AtlListBSEvents: {0}".format(line)) else: runnumber = int(runstr) evtnumber = int(evtstr) # We build up a string key as "RUN-EVENT", so that we can take advantage of # the fast hash search against a dictionary rawEventList[runstr + "-" + evtstr] = True msg.debug("Identified run {0}, event {1} in input RAW files".format(runstr, evtstr)) except ValueError, e: msg.warning("Failed to understand this line from AtlListBSEvents: {0}".format(line)) except subprocess.CalledProcessError, e: errMsg = "Call to AtlListBSEvents.exe failed: {0}".format(e) msg.error(erMsg) raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_EXEC_SETUP_FAIL"), errMsg)
def findExecutionPath(self): # Switch off all nodes, except if we have a single node which is not data driven... self._execution = {} for nodeName, node in self._nodeDict.iteritems(): if len(self._nodeDict) == 1 and node.inputDataTypes == set() and node.inputDataTypes == set(): self._execution[nodeName] = {'enabled' : True, 'input' : set(), 'output' : set()} else: self._execution[nodeName] = {'enabled' : False, 'input' : set(), 'output' : set()} dataToProduce = copy.deepcopy(self._outputData) dataAvailable = copy.deepcopy(self._inputData) # Consider the next data type in topo order while len(dataToProduce) > 0: nextDataType = None for dataType in self._toposortData: if dataType in dataToProduce: nextDataType = dataType dataToProduce.remove(nextDataType) dataAvailable.update([nextDataType]) break if not nextDataType: msg.error('Still have to produce data type(s) {0}, but did not find anything in the toposorted data list ({1}).' ' Transform parameters/graph are broken so aborting.'.format(dataToProduce, self._toposortData)) raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 'Data type graph error') msg.debug('Next data type to try is {0}'.format(nextDataType)) bestPath = self._bestPath(nextDataType, dataAvailable) msg.debug('Found best path for {0}: {1}'.format(nextDataType, bestPath)) ## @note Use @c modPath to construct an array which we iterate over in pairs of (currentNode, nextNode) modPath = bestPath.path + [None] for (nodeName, nextNodeName) in [ (n, modPath[modPath.index(n)+1]) for n in bestPath.path ]: self._execution[nodeName]['enabled'] = True # Add the necessary data types to the output of the first node and the input of the next if nodeName in bestPath.newData: self._execution[nodeName]['output'].update(bestPath.newData[nodeName]) for newData in bestPath.newData[nodeName]: if newData not in dataAvailable: dataToProduce.update([newData]) if nextNodeName: self._execution[nextNodeName]['input'].update(bestPath.newData[nodeName]) if nextNodeName in bestPath.extraData: self._execution[nextNodeName]['input'].update(bestPath.extraData[nodeName]) # Add any extra data we need (from multi-exit nodes) to the data to produce list for extraNodeData in bestPath.extraData.itervalues(): for extra in extraNodeData: if extra not in dataAvailable: dataToProduce.update([extra]) # Now remove the fake data objects from activated nodes for node, props in self._execution.iteritems(): msg.debug('Removing fake data from node {0}'.format(node)) props['input'] -= set(['inNULL', 'outNULL']) props['output'] -= set(['inNULL', 'outNULL']) msg.debug('Execution dictionary: {0}'.format(self._execution))
def classicSingleEltree(self, filename, fast=False): if filename not in self._fileArg.value: raise trfExceptions.TransformReportException( trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), 'Unknown file ({0}) in the file report for {1}'.format( filename, self._fileArg)) tree = ElementTree.Element('File', ID=str( self._fileArg.getSingleMetadata( fname=filename, metadataKey='file_guid', populate=not fast))) logical = ElementTree.SubElement(tree, 'logical') lfn = ElementTree.SubElement(logical, 'lfn', name=filename) for myKey, classicKey in self._internalToClassicMap.iteritems(): # beam_type is tricky - we return only the first list value, # (but remember, protect against funny stuff!) if myKey is 'beam_type': beamType = self._fileArg.getSingleMetadata(fname=filename, metadataKey=myKey, populate=not fast) if isinstance(beamType, list): if len(beamType) is 0: ElementTree.SubElement(tree, 'metadata', att_name=classicKey, att_value='') else: ElementTree.SubElement(tree, 'metadata', att_name=classicKey, att_value=str(beamType[0])) else: # This is really not normal, but best we can do is str conversion ElementTree.SubElement(tree, 'metadata', att_name=classicKey, att_value=str(beamType)) else: ElementTree.SubElement(tree, 'metadata', att_name=classicKey, att_value=str( self._fileArg.getSingleMetadata( fname=filename, metadataKey=myKey, populate=not fast))) # Now add the metadata which is stored at the whole argument level ElementTree.SubElement(tree, 'metadata', att_name='fileType', att_value=str(self._fileArg.type)) if self._fileArg.dataset is not None: ElementTree.SubElement(tree, 'metadata', att_name='dataset', att_value=self._fileArg.dataset) return tree
def exitCode(self): if self._exitCode == None: msg.warning( 'Transform exit code getter: _exitCode is unset, returning "TRF_UNKNOWN"' ) return trfExit.nameToCode('TRF_UNKNOWN') else: return self._exitCode
def _doSteering(self, steeringDict=None): if not steeringDict: steeringDict = self._argdict['steering'].value for substep, steeringValues in steeringDict.iteritems(): foundSubstep = False for executor in self._executors: if executor.name == substep or executor.substep == substep: foundSubstep = True msg.debug('Updating {0} with {1}'.format( executor.name, steeringValues)) # Steering consists of tuples with (in/out, +/-, datatype) for steeringValue in steeringValues: if steeringValue[0] == 'in': startSet = executor.inData else: startSet = executor.outData origLen = len(startSet) msg.debug('Data values to be modified are: {0}'.format( startSet)) if steeringValue[1] is '+': startSet.add(steeringValue[2]) if len(startSet) != origLen + 1: raise trfExceptions.TransformSetupException( trfExit.nameToCode( 'TRF_GRAPH_STEERING_ERROR'), 'Attempting to add data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype already there?' .format(steeringValue[2], executor.name, steeringValue[1], startSet)) else: startSet.discard(steeringValue[2]) if len(startSet) != origLen - 1: raise trfExceptions.TransformSetupException( trfExit.nameToCode( 'TRF_GRAPH_STEERING_ERROR'), 'Attempting to remove data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype even present?' .format(steeringValue[2], executor.name, steeringValue[1], startSet)) msg.debug('Updated data values to: {0}'.format(startSet)) if not foundSubstep: raise trfExceptions.TransformSetupException( trfExit.nameToCode('TRF_GRAPH_STEERING_ERROR'), 'This transform has no executor/substep {0}'.format( substep))
def __init__(self, executorSet, inputData=set([]), outputData=set([])): # Set basic node list self._nodeDict = {} msg.info('Transform graph input data: {0}; output data {1}'.format( inputData, outputData)) if len(executorSet) == 1: # Single executor - in this case inData/outData is not mandatory, so we set them to the # input/output data of the transform executor = list(executorSet)[0] if len(executor._inData) == 0 and len(executor._outData) == 0: executor.inData = inputData executor.outData = outputData for executor in executorSet: self.addNode(executor) self._inputData = set(inputData) self._outputData = set(outputData) # It's forbidden for a transform to consume and produce the same datatype dataOverlap = self._inputData & self._outputData if len(dataOverlap) > 0: raise trfExceptions.TransformSetupException( trfExit.nameToCode('TRF_GRAPH_ERROR'), 'Transform definition error, you cannot produce and consume the same datatypes in a transform. Duplicated input/output types {0}.' .format(' '.join(dataOverlap))) # Add a pseudo-start/stop nodes, from which input data flows and output data finally arrives # This makes the graph 'concrete' for this job # This is useful as then data edges all connect properly to a pair of nodes # We add a node for every possible output as this enables topo sorting of the graph # nodes for any intermediate data end nodes as well pseudoNodes = dict() pseudoNodes['_start'] = graphNode(name='_start', inData=[], outData=self._inputData, weight=0) for node in itervalues(self._nodeDict): for dataType in node.outputDataTypes: endNodeName = '_end_{0}'.format(dataType) pseudoNodes[endNodeName] = graphNode(name=endNodeName, inData=[dataType], outData=[], weight=0) self._nodeDict.update(pseudoNodes) # Toposort not yet done self._toposort = [] self._toposortData = [] # Now find connections between nodes self.findConnections()
def test_illegalName(self): cmd = ['Athena_tf.py', '--DBRelease', 'FailMeHarder'] msg.info('Will run this transform: {0}'.format(cmd)) p = subprocess.Popen(cmd, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) while p.poll() is None: line = p.stdout.readline() sys.stdout.write(line) # Hoover up remaining buffered output lines for line in p.stdout: sys.stdout.write(line) self.assertEqual(p.returncode, trfExit.nameToCode('TRF_DBRELEASE_PROBLEM'))
def _tracePath(self): self._executorGraph.findExecutionPath() self._executorPath = self._executorGraph.execution if len(self._executorPath) is 0: raise trfExceptions.TransformSetupException( trfExit.nameToCode('TRF_SETUP'), 'Execution path finding resulted in no substeps being executed' '(Did you correctly specify input data for this transform?)') # Tell the first executor that they are the first self._executorDictionary[self._executorPath[0] ['name']].conf.firstExecutor = True
def singleFilePython(self, filename, fast=False, type='full', basename=True): if filename not in self._fileArg.value: raise trfExceptions.TransformReportException( trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), 'Unknown file ({0}) in the file report for {1}'.format( filename, self._fileArg)) if basename: entry = {'name': os.path.basename(filename)} else: entry = {'name': os.path.relpath(os.path.normpath(filename))} if type == 'name': # For 'name' we return only the GUID entry.update( self._fileArg.getMetadata(files=filename, populate=not fast, metadataKeys=['file_guid' ])[filename]) elif type == 'full': # Suppress io because it's the key at a higher level and _exists because it's internal entry.update( self._fileArg.getMetadata(files=filename, populate=not fast, maskMetadataKeys=[ 'io', '_exists', 'integrity', 'file_type' ])[filename]) else: raise trfExceptions.TransformReportException( trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), 'Unknown file report type ({0}) in the file report for {1}'. format(type, self._fileArg)) return entry
def writeTranslate(runTranslate, runArgs, name, substep, first, output): msg.info('Writing options to file \"%s\"' % runTranslate) option = getOption(runArgs, name, substep, first, output) msg.info('Options set to: \"%s\":' % option) with open(runTranslate, 'w') as runTranslateFile: try: print >> runTranslateFile, os.linesep, "option = ", option except (IOError, OSError) as e: errMsg = 'Got an error when writing JO template {0}: {1}'.format( runTranslateFile, e) msg.error(errMsg) raise trfExceptions.TransformExecutionException( trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)
def writeJSONReport(self, filename, sort_keys=True, indent=2, fast=False, fileReport=defaultFileReport): with open(filename, 'w') as report: try: if not self._dataDictionary: self._dataDictionary = self.python(fast=fast, fileReport=fileReport) json.dump(self._dataDictionary, report, sort_keys=sort_keys, indent=indent) except TypeError as e: # TypeError means we had an unserialisable object - re-raise as a trf internal message = 'TypeError raised during JSON report output: {0!s}'.format( e) msg.error(message) raise trfExceptions.TransformReportException( trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), message)
def funcWithTimeout(*args, **kwargs): ltimeout = timeout lretry = retry ltimefactor = timefactor lsleeptime = sleeptime ldefaultrc = defaultrc if 'timeout' in kwargs: ltimeout = kwargs.pop('timeout') if 'retry' in kwargs: lretry = kwargs.pop('retry') if 'timefactor' in kwargs: ltimefactor = kwargs.pop('timefactor') if 'sleeptime' in kwargs: lsleeptime = kwargs.pop('sleeptime') if 'defaultrc' in kwargs: ldefaultrc = kwargs.pop('defaultrc') if ltimeout is None: # Run function normally with no timeout wrapper msg.debug('Running {0}: {1} {2} without timeout'.format( func, args, kwargs)) return func(*args, **kwargs) n = 0 while n <= lretry: msg.info('Try %i out of %i (time limit %s s) to call %s.', n + 1, retry + 1, ltimeout, func.__name__) starttime = time.time() q = mp.Queue(maxsize=1) nargs = (q, ) + args proc = mp.Process(target=funcWithQueue, args=nargs, kwargs=kwargs) proc.start() try: # Wait for function to run and return, but with a timeout flag, result = q.get(block=True, timeout=ltimeout) proc.join(60) msg.info('Executed call within %d s.', time.time() - starttime) if flag: return result else: msg.warning( 'But an exception occurred in function %s.', func.__name__) msg.warning('Returning default return code %s.', ldefaultrc) return ldefaultrc except queue.Empty: # Our function did not run in time - kill increase timeout msg.warning( 'Timeout limit of %d s reached. Kill subprocess and its children.', ltimeout) parent = proc.pid pids = [parent] pids.extend( trfUtils.listChildren(parent=parent, listOrphans=False)) trfUtils.infanticide(pids) proc.join(60) # Ensure cleanup if n != lretry: msg.info('Going to sleep for %d s.', lsleeptime) time.sleep(lsleeptime) n += 1 ltimeout *= ltimefactor lsleeptime *= ltimefactor except IOError: errMsg = "IOError while communicating with subprocess" msg.error(errMsg) raise TransformInternalException( trfExit.nameToCode("TRF_EXTERNAL"), errMsg) msg.warning('All %i tries failed!', n) raise TransformTimeoutException( trfExit.nameToCode('TRF_EXEC_TIMEOUT'), 'Timeout in function %s' % (func.__name__))
def _bestPath(self, data, dataAvailable, startNodeName = '_start', endNodeName = None): if endNodeName is None: endNodeName = '_end_{0}'.format(data) if endNodeName not in self._nodeDict: raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 'Node {0} was not found - the transform data connection definition is broken'.format(endNodeName)) # Set of all considered paths # Initialise this with our endNode name - algorithm works back to the start pathSet = [graphPath(endNodeName, data),] msg.debug('Started path finding with seed path {0}'.format(pathSet[0])) # Halting condition - only one path and its first element is startNodeName while len(pathSet) > 1 or pathSet[0].path[0] is not startNodeName: msg.debug('Starting best path iteration with {0} paths in {1}'.format(len(pathSet), pathSet)) # Copy the pathSet to do this, as we will update it for path in pathSet[:]: msg.debug('Continuing path finding with path {0}'.format(path)) currentNodeName = path.path[0] if currentNodeName is startNodeName: msg.debug('Path {0} has reached the start node - finished'.format(path)) continue # If there are no paths out of this node then it's a dead end - kill it if len(self._nodeDict[currentNodeName].connections['in']) is 0: msg.debug('Path {0} is a dead end - removing'.format(path)) pathSet.remove(path) continue # If there is only one path out of this node, we extend it if len(self._nodeDict[currentNodeName].connections['in']) is 1: msg.debug('Single exit from path {0} - adding connection to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0])) self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0]) continue # Else we need to clone the path for each possible exit msg.debug('Multiple exits from path {0} - will clone for each extra exit'.format([path])) for nextNodeName in self._nodeDict[currentNodeName].connections['in'].keys()[1:]: newPath = copy.deepcopy(path) msg.debug('Cloned exit from path {0} to {1}'.format(newPath, nextNodeName)) self._extendPath(newPath, currentNodeName, nextNodeName) pathSet.append(newPath) # Finally, use the original path to extend along the first node exit msg.debug('Adding exit from original path {0} to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0])) self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0]) # Now compare paths which made it to the end - only keep the shortest lowestCostPath = None for path in pathSet[:]: currentNodeName = path.path[0] if currentNodeName is startNodeName: if lowestCostPath is None: lowestCostPath = path continue if path.cost >= lowestCostPath.cost: msg.debug('Path {0} is no cheaper than best path {1} - removing'.format(path, lowestCostPath)) pathSet.remove(path) else: msg.debug('Path {0} is cheaper than previous best path {1} - removing previous'.format(path, lowestCostPath)) pathSet.remove(lowestCostPath) lowestCostPath = path # Emergency break if len(pathSet) == 0: raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 'No path found between {0} and {1} for {2}'.format(startNodeName, endNodeName, data)) return pathSet[0]
def parseCmdLineArgs(self, args): msg.info('Transform command line was: %s' % ' '.join(shQuoteStrings(sys.argv))) try: # Use the argparse infrastructure to get the actual command line arguments self._argdict = vars(self.parser.parse_args(args)) # Need to know if any input or output files were set - if so then we suppress the # corresponding parameters from AMI inputFiles = outputFiles = False for k, v in self._argdict.iteritems(): if k.startswith('input') and isinstance(v, argFile): inputFiles = True elif k.startswith('output') and isinstance(v, argFile): outputFiles = True msg.debug("CLI Input files: {0}; Output files {1}".format( inputFiles, outputFiles)) # Now look for special arguments, which expand out to other parameters # Note that the pickled argdict beats AMIConfig because dict.update() will overwrite # (However, we defend the real command line against updates from either source) extraParameters = {} # AMI configuration? if 'AMIConfig' in self._argdict: msg.debug('Given AMI tag configuration {0}'.format( self._argdict['AMIConfig'])) from PyJobTransforms.trfAMI import TagInfo tag = TagInfo(self._argdict['AMIConfig'].value) updateDict = {} for k, v in dict(tag.trfs[0]).iteritems(): # Convert to correct internal key form k = cliToKey(k) if inputFiles and k.startswith('input'): msg.debug( 'Suppressing argument {0} from AMI' ' because input files have been specified on the command line' .format(k)) continue if outputFiles and k.startswith('output'): msg.debug( 'Suppressing argument {0} from AMI' ' because output files have been specified on the command line' .format(k)) continue updateDict[k] = v extraParameters.update(updateDict) # JSON arguments? if 'argJSON' in self._argdict: try: import json msg.debug('Given JSON encoded arguments in {0}'.format( self._argdict['argJSON'])) argfile = open(self._argdict['argJSON'], 'r') jsonParams = json.load(argfile) msg.debug('Read: {0}'.format(jsonParams)) extraParameters.update(convertToStr(jsonParams)) argfile.close() except Exception, e: raise trfExceptions.TransformArgException( trfExit.nameToCode('TRF_ARG_ERROR'), 'Error when deserialising JSON file {0} ({1})'.format( self._argdict['argJSON'], e)) # Event Service if 'eventService' in self._argdict and self._argdict[ 'eventService'].value: updateDict = {} updateDict['athenaMPMergeTargetSize'] = '*:0' updateDict['checkEventCount'] = False updateDict['outputFileValidation'] = False extraParameters.update(updateDict) # Process anything we found for k, v in extraParameters.iteritems(): msg.debug( 'Found this extra argument: {0} with value: {1} ({2})'. format(k, v, type(v))) if k not in self.parser._argClass: raise trfExceptions.TransformArgException( trfExit.nameToCode('TRF_ARG_ERROR'), 'Argument "{0}" not known (try "--help")'.format(k)) if k in self._argdict: msg.debug( 'Ignored {0}={1} as extra parameter because this argument was given on the command line.' .format(k, v)) continue # For callable classes we instantiate properly, otherwise we set the value for simple arguments if '__call__' in dir(self.parser._argClass[k]): self._argdict[k] = self.parser._argClass[k](v) else: self._argdict[k] = v msg.debug('Argument {0} set to {1}'.format( k, self._argdict[k])) # Set the key name as an argument property - useful to be able to look bask at where this # argument came from for k, v in self._argdict.iteritems(): if isinstance(v, argument): v.name = k # Now we parsed all arguments, if a pickle/json dump is requested do it here and exit if 'dumpPickle' in self._argdict: msg.info('Now dumping pickled version of command line to {0}'. format(self._argdict['dumpPickle'])) pickledDump(self._argdict) sys.exit(0) # Now we parsed all arguments, if a pickle/json dump is requested do it here and exit if 'dumpJSON' in self._argdict: msg.info( 'Now dumping JSON version of command line to {0}'.format( self._argdict['dumpJSON'])) JSONDump(self._argdict) sys.exit(0)
def classicPython(self, fast=False): # Things we can get directly from the transform trfDict = { 'jobInputs': [], # Always empty? 'jobOutputs': [], # Filled in below... 'more': { 'Machine': 'unknown' }, 'trfAcronym': trfExit.codeToName(self._trf.exitCode), 'trfCode': self._trf.exitCode, 'trfExitCode': self._trf.exitCode, } if self._trf.lastExecuted is not None: trfDict.update({ 'athAcronym': self._trf.lastExecuted.errMsg, 'athCode': self._trf.lastExecuted.rc }) # Emulate the NEEDCHECK behaviour if hasattr(self._trf, '_executorPath'): for executor in self._trf._executorPath: if hasattr(executor, '_logScan') and self._trf.exitCode == 0: if executor._logScan._levelCounter[ 'FATAL'] > 0 or executor._logScan._levelCounter[ 'CRITICAL'] > 0: # This should not happen! msg.warning( 'Found FATAL/CRITICAL errors and exit code 0 - reseting to TRF_LOGFILE_FAIL' ) self._trf.exitCode = trfExit.nameToCode( 'TRF_LOGFILE_FAIL') trfDict['trfAcronym'] = 'TRF_LOGFILE_FAIL' elif executor._logScan._levelCounter['ERROR'] > 0: msg.warning( 'Found errors in logfile scan - changing exit acronymn to NEEDCHECK.' ) trfDict['trfAcronym'] = 'NEEDCHECK' # Now add files fileArgs = self._trf.getFiles(io='output') for fileArg in fileArgs: # N.B. In the original Tier 0 gpickles there was executor # information added for each file (such as autoConfiguration, preExec). # However, Luc tells me it is ignored, so let's not bother. trfDict['jobOutputs'].extend( trfFileReport(fileArg).classicPython(fast=fast)) # AMITag and friends is added per-file, but it's known only to the transform, so set it here: for argdictKey in ( 'AMITag', 'autoConfiguration', ): if argdictKey in self._trf.argdict: trfDict['jobOutputs'][-1]['more']['metadata'][ argdictKey] = self._trf.argdict[argdictKey].value # Mangle substep argumemts back to the old format for substepKey in ('preExec', 'postExec', 'preInclude', 'postInclude'): if substepKey in self._trf.argdict: for substep, values in iteritems( self._trf.argdict[substepKey].value): if substep == 'all': trfDict['jobOutputs'][-1]['more']['metadata'][ substepKey] = values else: trfDict['jobOutputs'][-1]['more']['metadata'][ substepKey + '_' + substep] = values # Now retrieve the input event count nentries = 'UNKNOWN' for fileArg in self._trf.getFiles(io='input'): thisArgNentries = fileArg.nentries if isinstance(thisArgNentries, int): if nentries == 'UNKNOWN': nentries = thisArgNentries elif thisArgNentries != nentries: msg.warning( 'Found a file with different event count than others: {0} != {1} for {2}' .format(thisArgNentries, nentries, fileArg)) # Take highest number? if thisArgNentries > nentries: nentries = thisArgNentries trfDict['nevents'] = nentries # Tier 0 expects the report to be in a top level dictionary under the prodsys key return {'prodsys': trfDict}
def python(self, fast=False, type='full'): # First entity contains shared properties - same for all files in this argFile if type == 'name': fileArgProps = { 'dataset': self._fileArg.dataset, 'nentries': self._fileArg.getnentries(fast), 'subFiles': [] } elif type == 'full': fileArgProps = { 'dataset': self._fileArg.dataset, 'type': self._fileArg.type, 'subFiles': [], 'argName': self._fileArg.name, } else: raise trfExceptions.TransformReportException( trfExit.nameToCode('TRF_INTERNAL_REPORT_ERROR'), 'Unknown file report type ({0}) in the file report for {1}'. format(type, self._fileArg)) ## @note We try to strip off the path when there are multiple files to be reported on, # however we should not do this if any of the files share a basename or anything is # in a different directory uniqueBasenames = set( [os.path.basename(fname) for fname in self._fileArg.value]) uniqueDirectories = set([ os.path.dirname(os.path.relpath(os.path.normpath(fname))) for fname in self._fileArg.value ]) if len(uniqueBasenames) != len(self._fileArg.value): msg.info( 'Detected two files with the same basename in a file argument - report for file {0} will be produced with the path as a key' .format(self._fileArg)) basenameReport = False elif len(uniqueDirectories) > 1: msg.warning( 'Detected output files in different directories - report for file {0} will be produced with the path as a key' .format(self._fileArg)) basenameReport = False else: basenameReport = True suppressed = [] for fname in self._fileArg.value: subFile = None if basenameReport: subFile = self.singleFilePython(fname, fast=fast, type=type) else: subFile = self.singleFilePython(fname, fast=fast, type=type, basename=False) if subFile is not None: # if nentries == 0 for DRAW, suppress subfile from report if 'nentries' in subFile and subFile[ 'nentries'] == 0 and isinstance( self._fileArg, trfArgClasses.argBSFile): msg.info('Suppressing file {0}, nentries is 0'.format( subFile['name'])) suppressed.append(subFile['name']) else: fileArgProps['subFiles'].append(subFile) return fileArgProps
import ast import json import os import traceback from json import dumps import logging msg = logging.getLogger(__name__) from PyJobTransforms.trfExceptions import TransformAMIException from PyJobTransforms.trfDefaultFiles import getInputFileName, getOutputFileName from PyJobTransforms.trfUtils import convertToStr from PyJobTransforms.trfExitCodes import trfExit AMIerrorCode = trfExit.nameToCode('TRF_AMI_ERROR') ## @brief Stores the configuration of a transform class TrfConfig: def __init__(self): self.name = None self.release = None self.physics = {} self.inFiles = {} self.outFiles = {} self.outputs = {} self.inDS = None self.outfmts = [] self.newTransform = False
def writeRunArgs(self, input=dict(), output=dict()): msg.info('Writing runArgs to file \"%s\"', self._runArgsFile) ## Check consistency btw --CA flag and provided skeletons: if 'CA' in self._exe.conf.argdict: if self._exe._skeletonCA is None: errMsg = "Got the --CA option but this transform doesn't supply a ComponentAccumulator-based skeleton file" msg.error(errMsg) raise trfExceptions.TransformExecutionException( trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg) else: # 'CA' not in self._exe.conf.argdict if self._exe._skeleton is None: errMsg = "No --CA option given, but this transform doesn't supply old-style skeleton file" msg.error(errMsg) raise trfExceptions.TransformExecutionException( trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg) with open(self._runArgsFile, 'w') as runargsFile: try: # First write a little header print(os.linesep.join( ("# Run arguments file auto-generated on {0} by:".format( time.asctime()), "# JobTransform: {0}".format(self._exe.name), "# Version: {0}".format(self._version))), file=runargsFile) # Now make sure we import the runArgs class for out job options print(os.linesep.join( ("# Import runArgs class", "from PyJobTransforms.trfJobOptions import RunArguments", "{0} = RunArguments()".format(self._runArgsName))), file=runargsFile) # Handy to write the substep name here as it can be used as (part of) a random seed # in some cases print('{0}.trfSubstepName = {1!r}'.format( self._runArgsName, self._exe.name), os.linesep, file=runargsFile) # Now loop over the core argdict and see what needs to be given as a runArg declaredRunargs = [] for k, v in iteritems(self._exe.conf.argdict): # Check if this arg is supposed to be in runArgs if isinstance(v, trfArgClasses.argument) and v.isRunarg: # Files handled later if isinstance(v, trfArgClasses.argFile): continue msg.debug( 'Argument {0} is a runarg, will be added to JO file (value {1})' .format(k, v.value)) ## @note Substep type arguments are rather special, they apply to only named # executors or substeps. We use the returnMyValue() method to sort out what # specific value applies to us if isinstance(v, trfArgClasses.argSubstep): myValue = v.returnMyValue(exe=self._exe) if myValue is not None: print("{0}.{1!s} = {2!r}".format( self._runArgsName, k, myValue), file=runargsFile) msg.debug( 'Added substep type argument {0} as: {1}'. format(k, myValue)) declaredRunargs.append(k) else: print("{0}.{1!s} = {2!r}".format( self._runArgsName, k, v.value), file=runargsFile) declaredRunargs.append(k) else: msg.debug( 'Argument {0} is not a runarg - ignored'.format(k)) # Now make sure that if we did not add maxEvents then we set this to -1, which # avoids some strange defaults that only allow 5 events to be processed if 'maxEvents' not in declaredRunargs: print(os.linesep.join(( "", "# Explicitly added to process all events in this step", "{0}.maxEvents = -1".format(self._runArgsName), )), file=runargsFile) # Now deal with our input and output files print(os.linesep, "# Input data", file=runargsFile) for dataType, dataArg in iteritems(input): print('{0}.input{1}File = {2!r}'.format( self._runArgsName, dataType, dataArg.value), file=runargsFile) print('{0}.input{1}FileType = {2!r}'.format( self._runArgsName, dataType, dataArg.type), file=runargsFile) # Add the input event count, if we know it if dataArg.isCached(metadataKeys=['nentries']): print('{0}.input{1}FileNentries = {2!r}'.format( self._runArgsName, dataType, dataArg.nentries), file=runargsFile) print("{0}.{1}FileIO = {2!r}".format( self._runArgsName, dataType, self._exe.conf.dataDictionary[dataType].io), file=runargsFile) print(os.linesep, "# Output data", file=runargsFile) for dataType, dataArg in iteritems(output): # Need to be careful to convert _output_ filename as a strings, not a list print('{0}.output{1}File = {2!r}'.format( self._runArgsName, dataType, dataArg.value[0]), file=runargsFile) print('{0}.output{1}FileType = {2!r}'.format( self._runArgsName, dataType, dataArg.type), file=runargsFile) # Process all of the tweaky special runtime arguments print(os.linesep, "# Extra runargs", file=runargsFile) ## @note extraRunargs are passed using repr, i.e., they should be constants for k, v in iteritems(self._exe._extraRunargs): ## @note: What to do if this is a CLI argument as well, in particular # for arguments like preExec we want to add to the list, not replace it if k in declaredRunargs: if isinstance(self._exe.conf.argdict[k].value, list): msg.debug('Extending runarg {0!s}={1!r}'.format( k, v)) print('{0}.{1!s}.extend({2!r})'.format( self._runArgsName, k, v), file=runargsFile) else: msg.debug('Adding runarg {0!s}={1!r}'.format(k, v)) print('{0}.{1!s} = {2!r}'.format( self._runArgsName, k, v), file=runargsFile) ## @note runtime runargs are passed as strings, i.e., they can be evaluated print(os.linesep, '# Extra runtime runargs', file=runargsFile) for k, v in iteritems(self._exe._runtimeRunargs): # These options are string converted, not repred, so they can write an option # which is evaluated at runtime # Protect this with try: except: for the Embedding use case msg.debug('Adding runarg {0!s}={1!r}'.format(k, v)) print(os.linesep.join( ('try:', ' {0}.{1!s} = {2!s}'.format( self._runArgsName, k, v), 'except AttributeError:', ' printfunc ("WARNING - AttributeError for {0}")'. format(k))), file=runargsFile) ## @note Now write the literals into the runargs file if self._exe._literalRunargs is not None: print(os.linesep, '# Literal runargs snippets', file=runargsFile) for line in self._exe._literalRunargs: print(line, file=runargsFile) ## Another special option - dataArgs are always written to the runargs file for dataType in self._exe._dataArgs: print(os.linesep, '# Forced data value arguments', file=runargsFile) if dataType in self._exe.conf.dataDictionary: print('{0}.data{1}arg = {2!r}'.format( self._runArgsName, dataType, self._exe.conf.dataDictionary[dataType].value), file=runargsFile) else: print( '# Warning: data type "{0}" is not part of this transform' .format(dataType), file=runargsFile) # This adds the correct JO fragment for AthenaMP job, where we need to ask # the FileMgr to produce the requested log and report files # Also, aggregating the workers' logfiles into the mother's makes life # easier for debugging if self._exe._athenaMP: print(os.linesep, '# AthenaMP Options. nprocs = %d' % self._exe._athenaMP, file=runargsFile) # Proxy for both options print(os.linesep.join(( os.linesep, 'from AthenaMP.AthenaMPFlags import jobproperties as AthenaMPJobProps', 'AthenaMPJobProps.AthenaMPFlags.WorkerTopDir="{0}"'. format(self._exe._athenaMPWorkerTopDir), 'AthenaMPJobProps.AthenaMPFlags.OutputReportFile="{0}"' .format(self._exe._athenaMPFileReport), 'AthenaMPJobProps.AthenaMPFlags.EventOrdersFile="{0}"'. format(self._exe._athenaMPEventOrdersFile), 'AthenaMPJobProps.AthenaMPFlags.CollectSubprocessLogs=True' )), file=runargsFile) if self._exe._athenaMPStrategy: # Beware of clobbering a non default value (a feature used by EventService) print( 'if AthenaMPJobProps.AthenaMPFlags.Strategy.isDefault():', file=runargsFile) print( '\tAthenaMPJobProps.AthenaMPFlags.Strategy="{0}"'. format(self._exe._athenaMPStrategy), file=runargsFile) if self._exe._athenaMPReadEventOrders: if os.path.isfile(self._exe._athenaMPEventOrdersFile): print( 'AthenaMPJobProps.AthenaMPFlags.ReadEventOrders=True', file=runargsFile) else: raise trfExceptions.TransformExecutionException( trfExit.nameToCode("TRF_EXEC_RUNARGS_ERROR"), "Failed to find file: {0} required by athenaMP option: --athenaMPUseEventOrders true" .format(self._exe._athenaMPEventOrdersFile)) if 'athenaMPEventsBeforeFork' in self._exe.conf.argdict: print( 'AthenaMPJobProps.AthenaMPFlags.EventsBeforeFork={0}' .format(self._exe.conf. argdict['athenaMPEventsBeforeFork'].value), file=runargsFile) if 'CA' in self._exe.conf.argdict: print(os.linesep, '# Threading flags', file=runargsFile) #Pass the number of threads threads = self._exe._athenaMT concurrentEvents = self._exe._athenaConcurrentEvents msg.debug('Adding runarg {0!s}={1!r}'.format( 'threads', threads)) print('{0}.{1!s} = {2!r}'.format(self._runArgsName, 'threads', threads), file=runargsFile) msg.debug('Adding runarg {0!s}={1!r}'.format( 'concurrentEvents', concurrentEvents)) print('{0}.{1!s} = {2!r}'.format(self._runArgsName, 'concurrentEvents', concurrentEvents), file=runargsFile) #ComponentAccumulator based config, import skeleton here: print(os.linesep, '# Import skeleton and execute it', file=runargsFile) print('from {0} import fromRunArgs'.format( self._exe._skeletonCA), file=runargsFile) print('fromRunArgs({0})'.format(self._runArgsName), file=runargsFile) msg.info('Successfully wrote runargs file {0}'.format( self._runArgsFile)) except (IOError, OSError) as e: errMsg = 'Got an error when writing JO template {0}: {1}'.format( self._runArgsFile, e) msg.error(errMsg) raise trfExceptions.TransformExecutionException( trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)
def detectAthenaMTThreads(argdict={}): athenaMTThreads = 0 athenaConcurrentEvents = 0 # Try and detect if any AthenaMT has been enabled try: if 'athenaopts' in argdict: for substep in argdict['athenaopts'].value: threadArg = [ opt.replace("--threads=", "") for opt in argdict['athenaopts'].value[substep] if '--threads' in opt ] if len(threadArg) == 0: athenaMTThreads = 0 elif len(threadArg) == 1: if 'multithreaded' in argdict: raise ValueError( "Detected conflicting methods to configure AthenaMT: --multithreaded and --threads=N (via athenaopts). Only one method must be used" ) athenaMTThreads = int(threadArg[0]) if athenaMTThreads < -1: raise ValueError( "--threads was set to a value less than -1") else: raise ValueError( "--threads was set more than once in 'athenaopts'") msg.info( 'AthenaMT detected from "threads" setting with {0} threads for substep {1}' .format(athenaMTThreads, substep)) concurrentEventsArg = [ opt.replace("--concurrent-events=", "") for opt in argdict['athenaopts'].value[substep] if '--concurrent-events' in opt ] if len(concurrentEventsArg) == 1: athenaConcurrentEvents = int(concurrentEventsArg[0]) if athenaConcurrentEvents < -1: raise ValueError( "--concurrent-events was set to a value less than -1" ) msg.info( 'Custom concurrent event setting read from "concurrent-events" with {0} events for substep {1}' .format(athenaConcurrentEvents, substep)) else: athenaConcurrentEvents = athenaMTThreads if (athenaMTThreads == 0 and 'ATHENA_CORE_NUMBER' in os.environ and 'multithreaded' in argdict): athenaMTThreads = int(os.environ['ATHENA_CORE_NUMBER']) if athenaMTThreads < -1: raise ValueError("ATHENA_CORE_NUMBER value was less than -1") msg.info( 'AthenaMT detected from ATHENA_CORE_NUMBER with {0} threads'. format(athenaMTThreads)) athenaConcurrentEvents = athenaMTThreads except ValueError as errMsg: myError = 'Problem discovering AthenaMT setup: {0}'.format(errMsg) raise trfExceptions.TransformExecutionException( trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), myError) return athenaMTThreads, athenaConcurrentEvents
def generateReport(self, reportType=None, fast=False, fileReport=defaultFileReport): msg.debug('Transform report generator') if 'reportType' in self._argdict: if reportType is not None: msg.info( 'Transform requested report types {0} overridden by command line to {1}' .format(reportType, self._argdict['reportType'].value)) reportType = self._argdict['reportType'].value if reportType is None: reportType = [ 'json', ] # Only generate the Tier0 report at Tier0 ;-) # (It causes spurious warnings for some grid jobs with background files (e.g., digitisation) if 'TZHOME' in os.environ: reportType.append('gpickle') if not isInteractiveEnv(): reportType.append('text') msg.debug( 'Detected Non-Interactive environment. Enabled text report' ) if 'reportName' in self._argdict: baseName = classicName = self._argdict['reportName'].value else: baseName = 'jobReport' classicName = 'metadata' try: # Text: Writes environment variables and machine report in text format. if reportType is None or 'text' in reportType: envName = baseName if 'reportName' in self._argdict else 'env' # Use fallback name 'env.txt' if it's not specified. self._report.writeTxtReport(filename='{0}.txt'.format(envName), fast=fast, fileReport=fileReport) # JSON if reportType is None or 'json' in reportType: self._report.writeJSONReport( filename='{0}.json'.format(baseName), fast=fast, fileReport=fileReport) # Classic XML if reportType is None or 'classic' in reportType: self._report.writeClassicXMLReport( filename='{0}.xml'.format(classicName), fast=fast) # Classic gPickle if reportType is None or 'gpickle' in reportType: self._report.writeGPickleReport( filename='{0}.gpickle'.format(baseName), fast=fast) # Pickled version of the JSON report for pilot if reportType is None or 'pilotPickle' in reportType: self._report.writePilotPickleReport( filename='{0}Extract.pickle'.format(baseName), fast=fast, fileReport=fileReport) except trfExceptions.TransformTimeoutException, reportException: msg.error('Received timeout when writing report ({0})'.format( reportException)) msg.error( 'Report writing is aborted - sorry. Transform will exit with TRF_METADATA_CALL_FAIL status.' ) if ('orphanKiller' in self._argdict): infanticide(message=True, listOrphans=True) else: infanticide(message=True) sys.exit(trfExit.nameToCode('TRF_METADATA_CALL_FAIL'))
def athenaMPOutputHandler(athenaMPFileReport, athenaMPWorkerTopDir, dataDictionary, athenaMPworkers, skipFileChecks = False, argdict = {}): msg.debug("MP output handler called for report {0} and workers in {1}, data types {2}".format(athenaMPFileReport, athenaMPWorkerTopDir, dataDictionary.keys())) outputHasBeenHandled = dict([ (dataType, False) for dataType in dataDictionary.keys() if dataDictionary[dataType] ]) # if sharedWriter mode is active ignore athenaMPFileReport sharedWriter=False if 'sharedWriter' in argdict and argdict['sharedWriter'].value: sharedWriter=True skipFileChecks=True if not sharedWriter: # First, see what AthenaMP told us mpOutputs = ElementTree.ElementTree() try: mpOutputs.parse(athenaMPFileReport) except IOError: raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Missing AthenaMP outputs file {0} (probably athena crashed)".format(athenaMPFileReport)) for filesElement in mpOutputs.getroot().getiterator(tag='Files'): msg.debug('Examining element {0} with attributes {1}'.format(filesElement, filesElement.attrib)) originalArg = None startName = filesElement.attrib['OriginalName'] for dataType, fileArg in dataDictionary.iteritems(): if fileArg.value[0] == startName: originalArg = fileArg outputHasBeenHandled[dataType] = True break if originalArg is None: msg.warning('Found AthenaMP output with name {0}, but no matching transform argument'.format(startName)) continue msg.debug('Found matching argument {0}'.format(originalArg)) fileNameList = [] for fileElement in filesElement.getiterator(tag='File'): msg.debug('Examining element {0} with attributes {1}'.format(fileElement, fileElement.attrib)) fileNameList.append(path.relpath(fileElement.attrib['name'])) athenaMPoutputsLinkAndUpdate(fileNameList, fileArg) # Now look for additional outputs that have not yet been handled if len([ dataType for dataType in outputHasBeenHandled if outputHasBeenHandled[dataType] is False]): # OK, we have something we need to search for; cache the dirwalk here MPdirWalk = [ dirEntry for dirEntry in os.walk(athenaMPWorkerTopDir) ] for dataType, fileArg in dataDictionary.iteritems(): if outputHasBeenHandled[dataType]: continue if fileArg.io is "input": continue msg.info("Searching MP worker directories for {0}".format(dataType)) startName = fileArg.value[0] fileNameList = [] for entry in MPdirWalk: if "evt_count" in entry[0]: continue if "range_scatterer" in entry[0]: continue # N.B. AthenaMP may have made the output name unique for us, so # we need to treat the original name as a prefix possibleOutputs = [ fname for fname in entry[2] if fname.startswith(startName) ] if len(possibleOutputs) == 0: continue elif len(possibleOutputs) == 1: fileNameList.append(path.join(entry[0], possibleOutputs[0])) elif skipFileChecks: pass else: raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found multiple matching outputs for datatype {0} in {1}: {2}".format(dataType, entry[0], possibleOutputs)) if skipFileChecks: pass elif len(fileNameList) != athenaMPworkers: raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found {0} output files for {1}, expected {2} (found: {3})".format(len(fileNameList), dataType, athenaMPworkers, fileNameList)) # Found expected number of files - good! athenaMPoutputsLinkAndUpdate(fileNameList, fileArg)