Esempio n. 1
0
    def preExecute(self, input = set(), output = set()):
        msg.info('Preparing for BSJobSplitterExecutor execution of {0} with inputs {1} and outputs {2}'.format(self.name, input, output))

        #See if we need to unpack a TAR file
        if 'hitarFile' in self.conf.argdict:
            print ("Untarring inputHITARFile", self.conf.argdict['hitarFile'].value)
            try:
                f=tarfile.open(name=self.conf.argdict['hitarFile'].value[0])
                f.list()
                f.extractall()
                f.close()
            except Exception as e:
                raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error while unpacking and extracting HI input files for transform: {0}'.format(e))

        # There are two ways to configure this transform:
        # - Give an inputZeroBiasBSFile argument directly
        # - Give a inputBSCONFIGFile and jobNumber argument
        # Check now that we have a configuration that works

        if 'inputZeroBiasBSFile' in self.conf.argdict and 'inputBSCONFIGFile' in self.conf.argdict:
            #raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Both inputZeroBiasBSFile and inputBSCONFIGFile have been specified - please use only one.')
            del self.conf.argdict['inputZeroBiasBSFile']
            print ("WARNING - removed the inputZeroBiasBSFile argument, because inputZeroBiasBSFile and inputBSCONFIGFile were already specified")

        if 'inputBSCONFIGFile' in self.conf.argdict:
            if 'jobNumber' not in self.conf.argdict:
                raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'inputBSCONFIGFile is specified, but no jobNumber was given.')
            # Job number has to wrap around from 500, dropping back to 1
            wrappedJobNumber = (self.conf.argdict['jobNumber'].value-1)%500 + 1

            self._inputFilelist = 'filelist_{0}.txt'.format(wrappedJobNumber)
            self._lbnList = 'lbn_anal_map_{0}.txt'.format(wrappedJobNumber)

            try:
                print (self.conf.argdict['inputBSCONFIGFile'].value)
                f=tarfile.open(name=self.conf.argdict['inputBSCONFIGFile'].value[0])
                f.extract('filelist_{0}.txt'.format(wrappedJobNumber))
                f.extract('lbn_anal_map_{0}.txt'.format(wrappedJobNumber))
                f.close()
                bsInputs = open(self._inputFilelist).readline().rstrip().split(',')
                self.conf.addToArgdict('inputZeroBiasBSFile', trfArgClasses.argBSFile(bsInputs, io='input', type='BS', subtype='BS_ZeroBias'))
                self.conf.addToDataDictionary('ZeroBiasBS', self.conf.argdict['inputZeroBiasBSFile'])
                input.add('ZeroBiasBS')
                msg.info('Validating resolved input bytestream files')
                trfValidation.performStandardFileValidation({'ZeroBiasBS': self.conf.argdict['inputZeroBiasBSFile']}, io='input')
            except Exception as e:
                raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'Error while unpacking and extracting input files for transform: {0}'.format(e))

            # Now setup correct input arguments
            self.conf.argdict['InputLbnMapFile'] = trfArgClasses.argString(self._lbnList)
            self.conf.argdict['InputFileMapFile'] = trfArgClasses.argString(self._inputFilelist)

        else:
            #if 'lumiBlockMapFile' not in self.conf.argdict:
            #    raise trfExceptions.TransformSetupException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), 'inputZeroBiasBSFile is specified, but no lumiBlockMapFile was given.')
            #self.conf.argdict['InputLbnMapFile'] = self.conf.argdict['lumiBlockMapFile']
            if 'lumiBlockMapFile' in self.conf.argdict:
                self.conf.argdict['InputLbnMapFile'] = self.conf.argdict['lumiBlockMapFile']
        super(BSJobSplitterExecutor, self).preExecute(input=input, output=output)
Esempio n. 2
0
 def _doSteering(self, steeringDict=None):
     if not steeringDict:
         steeringDict = self._argdict['steering'].value
     for substep, steeringValues in steeringDict.iteritems():
         foundSubstep = False
         for executor in self._executors:
             if executor.name == substep or executor.substep == substep:
                 foundSubstep = True
                 msg.debug('Updating {0} with {1}'.format(
                     executor.name, steeringValues))
                 # Steering consists of tuples with (in/out, +/-, datatype)
                 for steeringValue in steeringValues:
                     if steeringValue[0] == 'in':
                         startSet = executor.inData
                     else:
                         startSet = executor.outData
                     origLen = len(startSet)
                     msg.debug('Data values to be modified are: {0}'.format(
                         startSet))
                     if steeringValue[1] is '+':
                         startSet.add(steeringValue[2])
                         if len(startSet) != origLen + 1:
                             raise trfExceptions.TransformSetupException(
                                 trfExit.nameToCode(
                                     'TRF_GRAPH_STEERING_ERROR'),
                                 'Attempting to add data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype already there?'
                                 .format(steeringValue[2], executor.name,
                                         steeringValue[1], startSet))
                     else:
                         startSet.discard(steeringValue[2])
                         if len(startSet) != origLen - 1:
                             raise trfExceptions.TransformSetupException(
                                 trfExit.nameToCode(
                                     'TRF_GRAPH_STEERING_ERROR'),
                                 'Attempting to remove data type {0} from {1} {2} fails (original set of data: {3}). Was this datatype even present?'
                                 .format(steeringValue[2], executor.name,
                                         steeringValue[1], startSet))
                 msg.debug('Updated data values to: {0}'.format(startSet))
         if not foundSubstep:
             raise trfExceptions.TransformSetupException(
                 trfExit.nameToCode('TRF_GRAPH_STEERING_ERROR'),
                 'This transform has no executor/substep {0}'.format(
                     substep))
Esempio n. 3
0
    def __init__(self, executorSet, inputData=set([]), outputData=set([])):

        # Set basic node list
        self._nodeDict = {}

        msg.info('Transform graph input data: {0}; output data {1}'.format(
            inputData, outputData))

        if len(executorSet) == 1:
            # Single executor - in this case inData/outData is not mandatory, so we set them to the
            # input/output data of the transform
            executor = list(executorSet)[0]
            if len(executor._inData) == 0 and len(executor._outData) == 0:
                executor.inData = inputData
                executor.outData = outputData

        for executor in executorSet:
            self.addNode(executor)

        self._inputData = set(inputData)
        self._outputData = set(outputData)

        # It's forbidden for a transform to consume and produce the same datatype
        dataOverlap = self._inputData & self._outputData
        if len(dataOverlap) > 0:
            raise trfExceptions.TransformSetupException(
                trfExit.nameToCode('TRF_GRAPH_ERROR'),
                'Transform definition error, you cannot produce and consume the same datatypes in a transform. Duplicated input/output types {0}.'
                .format(' '.join(dataOverlap)))

        # Add a pseudo-start/stop nodes, from which input data flows and output data finally arrives
        # This makes the graph 'concrete' for this job
        # This is useful as then data edges all connect properly to a pair of nodes
        # We add a node for every possible output as this enables topo sorting of the graph
        # nodes for any intermediate data end nodes as well
        pseudoNodes = dict()
        pseudoNodes['_start'] = graphNode(name='_start',
                                          inData=[],
                                          outData=self._inputData,
                                          weight=0)
        for node in itervalues(self._nodeDict):
            for dataType in node.outputDataTypes:
                endNodeName = '_end_{0}'.format(dataType)
                pseudoNodes[endNodeName] = graphNode(name=endNodeName,
                                                     inData=[dataType],
                                                     outData=[],
                                                     weight=0)
        self._nodeDict.update(pseudoNodes)

        # Toposort not yet done
        self._toposort = []
        self._toposortData = []

        # Now find connections between nodes
        self.findConnections()
Esempio n. 4
0
    def _tracePath(self):
        self._executorGraph.findExecutionPath()

        self._executorPath = self._executorGraph.execution
        if len(self._executorPath) is 0:
            raise trfExceptions.TransformSetupException(
                trfExit.nameToCode('TRF_SETUP'),
                'Execution path finding resulted in no substeps being executed'
                '(Did you correctly specify input data for this transform?)')
        # Tell the first executor that they are the first
        self._executorDictionary[self._executorPath[0]
                                 ['name']].conf.firstExecutor = True