Esempio n. 1
0
def athenaMPoutputsLinkAndUpdate(newFullFilenames, fileArg):
    # Any files we link are numbered from 1, because we always set
    # the filename given to athena has _000 as a suffix so that the
    # mother process' file can be used without linking
    fileIndex = 1
    linkedNameList = []
    newFilenameValue = []
    for fname in newFullFilenames:
        if path.dirname(fname) == "":
            linkedNameList.append(None)
            newFilenameValue.append(fname)
        else:
            linkName = "{0}{1:03d}".format(path.basename(fname).rstrip('0'), fileIndex)
            linkedNameList.append(linkName)
            newFilenameValue.append(linkName)
            fileIndex += 1
            
    for linkname, fname in zip(linkedNameList, newFullFilenames):
        if linkname:
            if len(newFullFilenames) == 1:
                try:
                    os.rename(fname,fileArg.originalName)
                    newFilenameValue[0]=fileArg.originalName
                except OSError, e:
                    raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Failed to move {0} to {1}: {2}".format(fname, linkname, e))
            else:
                 try:
                     if path.lexists(linkname):
                         os.unlink(linkname)
                     os.symlink(fname, linkname)
                 except OSError, e:  
                     raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Failed to link {0} to {1}: {2}".format(fname, linkname, e))
Esempio n. 2
0
def detectAthenaMPProcs(argdict = {}):
    athenaMPProcs = 0
    
    # Try and detect if any AthenaMP has been enabled 
    try:
        if 'ATHENA_PROC_NUMBER' in os.environ:
            athenaMPProcs = int(os.environ['ATHENA_PROC_NUMBER'])
            if athenaMPProcs < 0:
                raise ValueError("ATHENA_PROC_NUMBER value was less than zero")
            msg.info('AthenaMP detected from ATHENA_PROC_NUMBER with {0} workers'.format(athenaMPProcs))
        elif 'athenaopts' in argdict:
            for substep in argdict['athenaopts'].value:
                procArg = [opt.replace("--nprocs=", "") for opt in argdict['athenaopts'].value[substep] if '--nprocs' in opt]
                if len(procArg) == 0:
                    athenaMPProcs = 0
                elif len(procArg) == 1:
                    athenaMPProcs = int(procArg[0])
                    if athenaMPProcs < 0:
                        raise ValueError("--nprocs was set to a value less than zero")
                else:
                    raise ValueError("--nprocs was set more than once in 'athenaopts'")
                msg.info('AthenaMP detected from "nprocs" setting with {0} workers for substep {1}'.format(athenaMPProcs,substep))
    except ValueError, errMsg:
        myError = 'Problem discovering AthenaMP setup: {0}'.format(errMsg)
        raise trfExceptions.TransformExecutionException(trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), myError)
Esempio n. 3
0
    def postExecute(self):

        msg.info("Check for trig_cost.root file")
        #costmon generates the file trig_cost.root
        #to save on panda it needs to be renamed via the outputNTUP_TRIGCOSTFile argument
        expectedFileName = 'trig_cost.root'
        #first check argument is in dict
        if 'outputNTUP_TRIGCOSTFile' in self.conf.argdict:
            #check file is created
            if (os.path.isfile(expectedFileName)):
                msg.info(
                    'Renaming %s to %s' %
                    (expectedFileName,
                     self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0]))
                try:
                    os.rename(
                        expectedFileName,
                        self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0])
                except OSError, e:
                    raise trfExceptions.TransformExecutionException(
                        trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'),
                        'Exception raised when renaming {0} to {1}: {2}'.
                        format(
                            expectedFileName, self.conf.
                            argdict['outputNTUP_TRIGCOSTFile'].value[0], e))
            else:
                msg.error(
                    'NTUP_TRIGCOST argument defined %s but %s not created' %
                    (self.conf.argdict['outputNTUP_TRIGCOSTFile'].value[0],
                     expectedFileName))
Esempio n. 4
0
 def preExecute(self, input = set(), output = set()):
     # First we need to strip the filter file down to events that are present 
     # in the RAW file we are going to skim. This is because the HI workflow
     # will provide millions of events in their filter file, more than acmd.py
     # can cope with.
     listEvtCommand = ['AtlListBSEvents.exe', '-l']
     listEvtCommand.extend(self.conf.argdict['inputBSFile'].value)
     # For best lookup speed, we store the runnumber/eventnumber in a dictionary (set would also
     # be fast)
     rawEventList = {} 
     try:
         for line in subprocess.check_output(listEvtCommand).split("\n"):
             if line.startswith("Index="):
                 try:
                     splitStrings = line.split(" ")
                     runprefix, runstr = splitStrings[1].split("=")
                     evtprefix, evtstr = splitStrings[2].split("=")
                     # Check sanity
                     if runprefix != "Run" or evtprefix != "Event":
                         msg.warning("Failed to understand this line from AtlListBSEvents: {0}".format(line))
                     else:
                         runnumber = int(runstr)
                         evtnumber = int(evtstr)
                         # We build up a string key as "RUN-EVENT", so that we can take advantage of
                         # the fast hash search against a dictionary 
                         rawEventList[runstr + "-" + evtstr] = True
                         msg.debug("Identified run {0}, event {1} in input RAW files".format(runstr, evtstr))
                 except ValueError, e:
                     msg.warning("Failed to understand this line from AtlListBSEvents: {0}".format(line))
     except subprocess.CalledProcessError, e:
         errMsg = "Call to AtlListBSEvents.exe failed: {0}".format(e)
         msg.error(erMsg)
         raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_EXEC_SETUP_FAIL"), errMsg)
Esempio n. 5
0
def writeTranslate(runTranslate, runArgs, name, substep, first, output):
    msg.info('Writing options to file \"%s\"' % runTranslate)

    option = getOption(runArgs, name, substep, first, output)

    msg.info('Options set to: \"%s\":' % option)

    with open(runTranslate, 'w') as runTranslateFile:
        try:
            print >> runTranslateFile, os.linesep, "option = ", option
        except (IOError, OSError) as e:
            errMsg = 'Got an error when writing JO template {0}: {1}'.format(
                runTranslateFile, e)
            msg.error(errMsg)
            raise trfExceptions.TransformExecutionException(
                trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)
Esempio n. 6
0
    def writeRunArgs(self, input=dict(), output=dict()):
        msg.info('Writing runArgs to file \"%s\"', self._runArgsFile)

        ## Check consistency btw --CA flag and provided skeletons:
        if 'CA' in self._exe.conf.argdict:
            if self._exe._skeletonCA is None:
                errMsg = "Got the --CA option but this transform doesn't supply a ComponentAccumulator-based skeleton file"
                msg.error(errMsg)
                raise trfExceptions.TransformExecutionException(
                    trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)
        else:  # 'CA' not in self._exe.conf.argdict
            if self._exe._skeleton is None:
                errMsg = "No --CA option given, but this transform doesn't supply old-style skeleton file"
                msg.error(errMsg)
                raise trfExceptions.TransformExecutionException(
                    trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)

        with open(self._runArgsFile, 'w') as runargsFile:
            try:
                # First write a little header
                print(os.linesep.join(
                    ("# Run arguments file auto-generated on {0} by:".format(
                        time.asctime()),
                     "# JobTransform: {0}".format(self._exe.name),
                     "# Version: {0}".format(self._version))),
                      file=runargsFile)

                # Now make sure we import the runArgs class for out job options
                print(os.linesep.join(
                    ("# Import runArgs class",
                     "from PyJobTransforms.trfJobOptions import RunArguments",
                     "{0} = RunArguments()".format(self._runArgsName))),
                      file=runargsFile)

                # Handy to write the substep name here as it can be used as (part of) a random seed
                # in some cases
                print('{0}.trfSubstepName = {1!r}'.format(
                    self._runArgsName, self._exe.name),
                      os.linesep,
                      file=runargsFile)

                # Now loop over the core argdict and see what needs to be given as a runArg
                declaredRunargs = []
                for k, v in iteritems(self._exe.conf.argdict):
                    # Check if this arg is supposed to be in runArgs
                    if isinstance(v, trfArgClasses.argument) and v.isRunarg:
                        # Files handled later
                        if isinstance(v, trfArgClasses.argFile):
                            continue

                        msg.debug(
                            'Argument {0} is a runarg, will be added to JO file (value {1})'
                            .format(k, v.value))

                        ## @note Substep type arguments are rather special, they apply to only named
                        #  executors or substeps. We use the returnMyValue() method to sort out what
                        #  specific value applies to us
                        if isinstance(v, trfArgClasses.argSubstep):
                            myValue = v.returnMyValue(exe=self._exe)
                            if myValue is not None:
                                print("{0}.{1!s} = {2!r}".format(
                                    self._runArgsName, k, myValue),
                                      file=runargsFile)
                                msg.debug(
                                    'Added substep type argument {0} as: {1}'.
                                    format(k, myValue))
                                declaredRunargs.append(k)
                        else:
                            print("{0}.{1!s} = {2!r}".format(
                                self._runArgsName, k, v.value),
                                  file=runargsFile)
                            declaredRunargs.append(k)
                    else:
                        msg.debug(
                            'Argument {0} is not a runarg - ignored'.format(k))

                # Now make sure that if we did not add maxEvents  then we set this to -1, which
                # avoids some strange defaults that only allow 5 events to be processed
                if 'maxEvents' not in declaredRunargs:
                    print(os.linesep.join((
                        "",
                        "# Explicitly added to process all events in this step",
                        "{0}.maxEvents = -1".format(self._runArgsName),
                    )),
                          file=runargsFile)

                # Now deal with our input and output files
                print(os.linesep, "# Input data", file=runargsFile)
                for dataType, dataArg in iteritems(input):
                    print('{0}.input{1}File = {2!r}'.format(
                        self._runArgsName, dataType, dataArg.value),
                          file=runargsFile)
                    print('{0}.input{1}FileType = {2!r}'.format(
                        self._runArgsName, dataType, dataArg.type),
                          file=runargsFile)
                    # Add the input event count, if we know it
                    if dataArg.isCached(metadataKeys=['nentries']):
                        print('{0}.input{1}FileNentries = {2!r}'.format(
                            self._runArgsName, dataType, dataArg.nentries),
                              file=runargsFile)
                    print("{0}.{1}FileIO = {2!r}".format(
                        self._runArgsName, dataType,
                        self._exe.conf.dataDictionary[dataType].io),
                          file=runargsFile)

                print(os.linesep, "# Output data", file=runargsFile)
                for dataType, dataArg in iteritems(output):
                    # Need to be careful to convert _output_ filename as a strings, not a list
                    print('{0}.output{1}File = {2!r}'.format(
                        self._runArgsName, dataType, dataArg.value[0]),
                          file=runargsFile)
                    print('{0}.output{1}FileType = {2!r}'.format(
                        self._runArgsName, dataType, dataArg.type),
                          file=runargsFile)

                # Process all of the tweaky special runtime arguments
                print(os.linesep, "# Extra runargs", file=runargsFile)
                ## @note extraRunargs are passed using repr, i.e., they should be constants
                for k, v in iteritems(self._exe._extraRunargs):
                    ## @note: What to do if this is a CLI argument as well, in particular
                    #  for arguments like preExec we want to add to the list, not replace it
                    if k in declaredRunargs:
                        if isinstance(self._exe.conf.argdict[k].value, list):
                            msg.debug('Extending runarg {0!s}={1!r}'.format(
                                k, v))
                            print('{0}.{1!s}.extend({2!r})'.format(
                                self._runArgsName, k, v),
                                  file=runargsFile)
                    else:
                        msg.debug('Adding runarg {0!s}={1!r}'.format(k, v))
                        print('{0}.{1!s} = {2!r}'.format(
                            self._runArgsName, k, v),
                              file=runargsFile)

                ## @note runtime runargs are passed as strings, i.e., they can be evaluated
                print(os.linesep, '# Extra runtime runargs', file=runargsFile)
                for k, v in iteritems(self._exe._runtimeRunargs):
                    # These options are string converted, not repred, so they can write an option
                    # which is evaluated at runtime
                    # Protect this with try: except: for the Embedding use case
                    msg.debug('Adding runarg {0!s}={1!r}'.format(k, v))
                    print(os.linesep.join(
                        ('try:', '    {0}.{1!s} = {2!s}'.format(
                            self._runArgsName, k, v), 'except AttributeError:',
                         '    printfunc ("WARNING - AttributeError for {0}")'.
                         format(k))),
                          file=runargsFile)

                ## @note Now write the literals into the runargs file
                if self._exe._literalRunargs is not None:
                    print(os.linesep,
                          '# Literal runargs snippets',
                          file=runargsFile)
                    for line in self._exe._literalRunargs:
                        print(line, file=runargsFile)

                ## Another special option - dataArgs are always written to the runargs file
                for dataType in self._exe._dataArgs:
                    print(os.linesep,
                          '# Forced data value arguments',
                          file=runargsFile)
                    if dataType in self._exe.conf.dataDictionary:
                        print('{0}.data{1}arg = {2!r}'.format(
                            self._runArgsName, dataType,
                            self._exe.conf.dataDictionary[dataType].value),
                              file=runargsFile)
                    else:
                        print(
                            '# Warning: data type "{0}" is not part of this transform'
                            .format(dataType),
                            file=runargsFile)

                # This adds the correct JO fragment for AthenaMP job, where we need to ask
                # the FileMgr to produce the requested log and report files
                # Also, aggregating the workers' logfiles into the mother's makes life
                # easier for debugging
                if self._exe._athenaMP:
                    print(os.linesep,
                          '# AthenaMP Options. nprocs = %d' %
                          self._exe._athenaMP,
                          file=runargsFile)
                    # Proxy for both options
                    print(os.linesep.join((
                        os.linesep,
                        'from AthenaMP.AthenaMPFlags import jobproperties as AthenaMPJobProps',
                        'AthenaMPJobProps.AthenaMPFlags.WorkerTopDir="{0}"'.
                        format(self._exe._athenaMPWorkerTopDir),
                        'AthenaMPJobProps.AthenaMPFlags.OutputReportFile="{0}"'
                        .format(self._exe._athenaMPFileReport),
                        'AthenaMPJobProps.AthenaMPFlags.EventOrdersFile="{0}"'.
                        format(self._exe._athenaMPEventOrdersFile),
                        'AthenaMPJobProps.AthenaMPFlags.CollectSubprocessLogs=True'
                    )),
                          file=runargsFile)
                    if self._exe._athenaMPStrategy:
                        # Beware of clobbering a non default value (a feature used by EventService)
                        print(
                            'if AthenaMPJobProps.AthenaMPFlags.Strategy.isDefault():',
                            file=runargsFile)
                        print(
                            '\tAthenaMPJobProps.AthenaMPFlags.Strategy="{0}"'.
                            format(self._exe._athenaMPStrategy),
                            file=runargsFile)
                    if self._exe._athenaMPReadEventOrders:
                        if os.path.isfile(self._exe._athenaMPEventOrdersFile):
                            print(
                                'AthenaMPJobProps.AthenaMPFlags.ReadEventOrders=True',
                                file=runargsFile)
                        else:
                            raise trfExceptions.TransformExecutionException(
                                trfExit.nameToCode("TRF_EXEC_RUNARGS_ERROR"),
                                "Failed to find file: {0} required by athenaMP option: --athenaMPUseEventOrders true"
                                .format(self._exe._athenaMPEventOrdersFile))
                    if 'athenaMPEventsBeforeFork' in self._exe.conf.argdict:
                        print(
                            'AthenaMPJobProps.AthenaMPFlags.EventsBeforeFork={0}'
                            .format(self._exe.conf.
                                    argdict['athenaMPEventsBeforeFork'].value),
                            file=runargsFile)
                if 'CA' in self._exe.conf.argdict:
                    print(os.linesep, '# Threading flags', file=runargsFile)
                    #Pass the number of threads
                    threads = self._exe._athenaMT
                    concurrentEvents = self._exe._athenaConcurrentEvents
                    msg.debug('Adding runarg {0!s}={1!r}'.format(
                        'threads', threads))
                    print('{0}.{1!s} = {2!r}'.format(self._runArgsName,
                                                     'threads', threads),
                          file=runargsFile)
                    msg.debug('Adding runarg {0!s}={1!r}'.format(
                        'concurrentEvents', concurrentEvents))
                    print('{0}.{1!s} = {2!r}'.format(self._runArgsName,
                                                     'concurrentEvents',
                                                     concurrentEvents),
                          file=runargsFile)
                    #ComponentAccumulator based config, import skeleton here:
                    print(os.linesep,
                          '# Import skeleton and execute it',
                          file=runargsFile)
                    print('from {0} import fromRunArgs'.format(
                        self._exe._skeletonCA),
                          file=runargsFile)
                    print('fromRunArgs({0})'.format(self._runArgsName),
                          file=runargsFile)

                msg.info('Successfully wrote runargs file {0}'.format(
                    self._runArgsFile))

            except (IOError, OSError) as e:
                errMsg = 'Got an error when writing JO template {0}: {1}'.format(
                    self._runArgsFile, e)
                msg.error(errMsg)
                raise trfExceptions.TransformExecutionException(
                    trfExit.nameToCode('TRF_EXEC_RUNARGS_ERROR'), errMsg)
Esempio n. 7
0
def athenaMPOutputHandler(athenaMPFileReport, athenaMPWorkerTopDir, dataDictionary, athenaMPworkers, skipFileChecks = False, argdict = {}):
    msg.debug("MP output handler called for report {0} and workers in {1}, data types {2}".format(athenaMPFileReport, athenaMPWorkerTopDir, dataDictionary.keys()))
    outputHasBeenHandled = dict([ (dataType, False) for dataType in dataDictionary.keys() if dataDictionary[dataType] ])

    # if sharedWriter mode is active ignore athenaMPFileReport
    sharedWriter=False
    if 'sharedWriter' in argdict and argdict['sharedWriter'].value:
        sharedWriter=True
        skipFileChecks=True

    if not sharedWriter:
        # First, see what AthenaMP told us
        mpOutputs = ElementTree.ElementTree()
        try:
            mpOutputs.parse(athenaMPFileReport)
        except IOError:
            raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Missing AthenaMP outputs file {0} (probably athena crashed)".format(athenaMPFileReport))
        for filesElement in mpOutputs.getroot().getiterator(tag='Files'):
            msg.debug('Examining element {0} with attributes {1}'.format(filesElement, filesElement.attrib))
            originalArg = None 
            startName = filesElement.attrib['OriginalName']
            for dataType, fileArg in dataDictionary.iteritems():
                if fileArg.value[0] == startName:
                    originalArg = fileArg
                    outputHasBeenHandled[dataType] = True
                    break
            if originalArg is None:
                msg.warning('Found AthenaMP output with name {0}, but no matching transform argument'.format(startName))
                continue
        
            msg.debug('Found matching argument {0}'.format(originalArg))
            fileNameList = []
            for fileElement in filesElement.getiterator(tag='File'):
                msg.debug('Examining element {0} with attributes {1}'.format(fileElement, fileElement.attrib))
                fileNameList.append(path.relpath(fileElement.attrib['name']))

            athenaMPoutputsLinkAndUpdate(fileNameList, fileArg)

    # Now look for additional outputs that have not yet been handled
    if len([ dataType for dataType in outputHasBeenHandled if outputHasBeenHandled[dataType] is False]):
        # OK, we have something we need to search for; cache the dirwalk here
        MPdirWalk = [ dirEntry for dirEntry in os.walk(athenaMPWorkerTopDir) ]

        for dataType, fileArg in dataDictionary.iteritems():
            if outputHasBeenHandled[dataType]:
                continue
            if fileArg.io is "input":
                continue
            msg.info("Searching MP worker directories for {0}".format(dataType))
            startName = fileArg.value[0]
            fileNameList = []
            for entry in MPdirWalk:
                if "evt_count" in entry[0]:
                    continue
                if "range_scatterer" in entry[0]:
                    continue
                # N.B. AthenaMP may have made the output name unique for us, so 
                # we need to treat the original name as a prefix
                possibleOutputs = [ fname for fname in entry[2] if fname.startswith(startName) ]
                if len(possibleOutputs) == 0:
                    continue
                elif len(possibleOutputs) == 1:
                    fileNameList.append(path.join(entry[0], possibleOutputs[0]))
                elif skipFileChecks:
                    pass
                else:
                    raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found multiple matching outputs for datatype {0} in {1}: {2}".format(dataType, entry[0], possibleOutputs))
            if skipFileChecks:
                pass
            elif len(fileNameList) != athenaMPworkers:
                raise trfExceptions.TransformExecutionException(trfExit.nameToCode("TRF_OUTPUT_FILE_ERROR"), "Found {0} output files for {1}, expected {2} (found: {3})".format(len(fileNameList), dataType, athenaMPworkers, fileNameList))

            # Found expected number of files - good!
            athenaMPoutputsLinkAndUpdate(fileNameList, fileArg) 
Esempio n. 8
0
def detectAthenaMTThreads(argdict={}):
    athenaMTThreads = 0
    athenaConcurrentEvents = 0

    # Try and detect if any AthenaMT has been enabled
    try:
        if 'athenaopts' in argdict:
            for substep in argdict['athenaopts'].value:
                threadArg = [
                    opt.replace("--threads=", "")
                    for opt in argdict['athenaopts'].value[substep]
                    if '--threads' in opt
                ]
                if len(threadArg) == 0:
                    athenaMTThreads = 0
                elif len(threadArg) == 1:
                    if 'multithreaded' in argdict:
                        raise ValueError(
                            "Detected conflicting methods to configure AthenaMT: --multithreaded and --threads=N (via athenaopts). Only one method must be used"
                        )
                    athenaMTThreads = int(threadArg[0])
                    if athenaMTThreads < -1:
                        raise ValueError(
                            "--threads was set to a value less than -1")
                else:
                    raise ValueError(
                        "--threads was set more than once in 'athenaopts'")
                msg.info(
                    'AthenaMT detected from "threads" setting with {0} threads for substep {1}'
                    .format(athenaMTThreads, substep))

                concurrentEventsArg = [
                    opt.replace("--concurrent-events=", "")
                    for opt in argdict['athenaopts'].value[substep]
                    if '--concurrent-events' in opt
                ]
                if len(concurrentEventsArg) == 1:
                    athenaConcurrentEvents = int(concurrentEventsArg[0])
                    if athenaConcurrentEvents < -1:
                        raise ValueError(
                            "--concurrent-events was set to a value less than -1"
                        )
                    msg.info(
                        'Custom concurrent event setting read from "concurrent-events" with {0} events for substep {1}'
                        .format(athenaConcurrentEvents, substep))
                else:
                    athenaConcurrentEvents = athenaMTThreads
        if (athenaMTThreads == 0 and 'ATHENA_CORE_NUMBER' in os.environ
                and 'multithreaded' in argdict):
            athenaMTThreads = int(os.environ['ATHENA_CORE_NUMBER'])
            if athenaMTThreads < -1:
                raise ValueError("ATHENA_CORE_NUMBER value was less than -1")
            msg.info(
                'AthenaMT detected from ATHENA_CORE_NUMBER with {0} threads'.
                format(athenaMTThreads))
            athenaConcurrentEvents = athenaMTThreads
    except ValueError as errMsg:
        myError = 'Problem discovering AthenaMT setup: {0}'.format(errMsg)
        raise trfExceptions.TransformExecutionException(
            trfExit.nameToCode('TRF_EXEC_SETUP_FAIL'), myError)

    return athenaMTThreads, athenaConcurrentEvents
Esempio n. 9
0
        #first check argument is in dict
        if 'outputNTUP_TRIGRATEFile' in self.conf.argdict:
            #check file is created
            if (os.path.isfile(expectedFileName)):
                msg.info(
                    'Renaming %s to %s' %
                    (expectedFileName,
                     self.conf.argdict['outputNTUP_TRIGRATEFile'].value[0]))
                try:
                    os.rename(
                        expectedFileName,
                        self.conf.argdict['outputNTUP_TRIGRATEFile'].value[0])
                except OSError, e:
                    raise trfExceptions.TransformExecutionException(
                        trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'),
                        'Exception raised when renaming {0} to {1}: {2}'.
                        format(
                            expectedFileName, self.conf.
                            argdict['outputNTUP_TRIGRATEFile'].value[0], e))
            else:
                msg.error(
                    'NTUP_TRIGRATE argument defined %s but %s not created' %
                    (self.conf.argdict['outputNTUP_TRIGRATEFile'].value[0],
                     expectedFileName))
        else:
            msg.info('NTUP_TRIGRATE argument not defined so skip %s check' %
                     expectedFileName)

        msg.info("Check for trig_ebweight.root file")
        #costmon generates the file trig_ebweight.root
        #to save on panda it needs to be renamed via the outputNTUP_TRIGEBWGHTFile argument
        expectedFileName = 'trig_ebweight.root'
Esempio n. 10
0
    def preExecute(self, input=set(), output=set()):
        msg.debug(
            'Preparing for execution of {0} with inputs {1} and outputs {2}'.
            format(self.name, input, output))
        ## Try to detect AthenaMP mode
        #self._athenaMP = self._detectAthenaMP()
        #
        ## And if this is athenaMP, then set some options for workers and output file report
        #if self._athenaMP:
        #    self._athenaMPWorkerTopDir = 'athenaMP-workers-{0}-{1}'.format(self._name, self._substep)
        #    self._athenaMPFileReport = 'athenaMP-outputs-{0}-{1}'.format(self._name, self._substep)
        #else:
        #    self._athenaMPWorkerTopDir = self._athenaMPFileReport = None

        # Check we actually have events to process!
        if (self._inputEventTest and 'skipEvents' in self.conf.argdict
                and self.conf.argdict['skipEvents'].returnMyValue(
                    name=self._name,
                    substep=self._substep,
                    first=self.conf.firstExecutor) is not None):
            msg.debug('Will test for events to process')
            for dataType in input:
                inputEvents = self.conf.dataDictionary[dataType].nentries
                msg.debug('Got {0} events for {1}'.format(
                    inputEvents, dataType))
                if not isinstance(inputEvents, (int, long)):
                    msg.warning(
                        'Are input events countable? Got nevents={0} so disabling event count check for this input'
                        .format(inputEvents))
                elif self.conf.argdict['skipEvents'].returnMyValue(
                        name=self._name,
                        substep=self._substep,
                        first=self.conf.firstExecutor) >= inputEvents:
                    raise trfExceptions.TransformExecutionException(
                        trfExit.nameToCode('TRF_NOEVENTS'),
                        'No events to process: {0} (skipEvents) >= {1} (inputEvents of {2}'
                        .format(
                            self.conf.argdict['skipEvents'].returnMyValue(
                                name=self._name,
                                substep=self._substep,
                                first=self.conf.firstExecutor), inputEvents,
                            dataType))

        ## Write the skeleton file and prep athena
        if self._skeleton is not None:
            inputFiles = dict()
            for dataType in input:
                inputFiles[dataType] = self.conf.dataDictionary[dataType]
            outputFiles = dict()
            for dataType in output:
                outputFiles[dataType] = self.conf.dataDictionary[dataType]

            # See if we have any 'extra' file arguments
            for dataType, dataArg in self.conf.dataDictionary.iteritems():
                if dataArg.io == 'input' and self._name in dataArg.executor:
                    inputFiles[dataArg.subtype] = dataArg

            msg.info('Input Files: {0}; Output Files: {1}'.format(
                inputFiles, outputFiles))

            # Get the list of top options files that will be passed to athena (=runargs file + all skeletons)
            self._topOptionsFiles = self._jobOptionsTemplate.getTopOptions(
                input=inputFiles, output=outputFiles)

        ## Add input/output file information - this can't be done in __init__ as we don't know what our
        #  inputs and outputs will be then
        if len(input) > 0:
            self._extraMetadata['inputs'] = list(input)
        if len(output) > 0:
            self._extraMetadata['outputs'] = list(output)

        ## Do we need to run asetup first?
        asetupString = None
        if 'asetup' in self.conf.argdict:
            asetupString = self.conf.argdict['asetup'].returnMyValue(
                name=self._name,
                substep=self._substep,
                first=self.conf.firstExecutor)
        else:
            msg.info('Asetup report: {0}'.format(asetupReport()))

        ## DBRelease configuration
        dbrelease = dbsetup = None
        if 'DBRelease' in self.conf.argdict:
            dbrelease = self.conf.argdict['DBRelease'].returnMyValue(
                name=self._name,
                substep=self._substep,
                first=self.conf.firstExecutor)
            if dbrelease:
                # Classic tarball - filename format is DBRelease-X.Y.Z.tar.gz
                dbdMatch = re.match(r'DBRelease-([\d\.]+)\.tar\.gz',
                                    os.path.basename(dbrelease))
                if dbdMatch:
                    msg.debug(
                        'DBRelease setting {0} matches classic tarball file'.
                        format(dbrelease))
                    if not os.access(dbrelease, os.R_OK):
                        msg.warning(
                            'Transform was given tarball DBRelease file {0}, but this is not there'
                            .format(dbrelease))
                        msg.warning(
                            'I will now try to find DBRelease {0} in cvmfs'.
                            format(dbdMatch.group(1)))
                        dbrelease = dbdMatch.group(1)
                        dbsetup = cvmfsDBReleaseCheck(dbrelease)
                    else:
                        # Check if the DBRelease is setup
                        unpacked, dbsetup = unpackDBRelease(
                            tarball=dbrelease, dbversion=dbdMatch.group(1))
                        if unpacked:
                            # Now run the setup.py script to customise the paths to the current location...
                            setupDBRelease(dbsetup)
                # For cvmfs we want just the X.Y.Z release string (and also support 'current')
                else:
                    dbsetup = cvmfsDBReleaseCheck(dbrelease)

        # Look for environment updates and perpare the athena command line
        self._envUpdate = trfEnv.environmentUpdate()
        #above is needed by _prepAthenaCommandLine, but remove the setStandardEnvironment so doesn't include imf or tcmalloc
        #self._envUpdate.setStandardEnvironment(self.conf.argdict)
        self._prepAthenaCommandLine()

        #to get athenaHLT to read in the relevant parts from the runargs file we have to add the -F option
        if 'athenaHLT' in self._exe:
            self._cmd = [
                '-F runtranslate.BSRDOtoRAW.py'
                if x == 'runargs.BSRDOtoRAW.py' else x for x in self._cmd
            ]

            # write runTranslate file to be used by athenaHLT
            writeTranslate('runtranslate.BSRDOtoRAW.py',
                           self.conf.argdict,
                           name=self._name,
                           substep=self._substep,
                           first=self.conf.firstExecutor,
                           output=outputFiles)

            #instead of running athenaHLT we can dump the options it has loaded
            #note the -D needs to go after the -F in the command
            if 'dumpOptions' in self.conf.argdict:
                self._cmd = [
                    '-F runtranslate.BSRDOtoRAW.py -D'
                    if x == '-F runtranslate.BSRDOtoRAW.py' else x
                    for x in self._cmd
                ]

            #Run preRun step debug_stream analysis if debug_stream=True
            if 'debug_stream' in self.conf.argdict:
                inputFiles = dict()
                for dataType in input:
                    inputFiles[dataType] = self.conf.dataDictionary[dataType]
                outputFiles = dict()
                for dataType in output:
                    outputFiles[dataType] = self.conf.dataDictionary[dataType]

                #set default file name for debug_stream analysis output
                fileNameDbg = ['debug-stream-monitoring.root']
                if 'HIST_DEBUGSTREAMMON' in output:
                    fileNameDbg = outputFiles['HIST_DEBUGSTREAMMON'].value

                #if file exist then rename file to -old.root to keep as backup
                if (os.path.isfile(fileNameDbg[0])):
                    oldOutputFileNameDbg = fileNameDbg[0].replace(
                        ".root", "_old.root")
                    msg.info('Renaming %s to %s' %
                             (fileNameDbg[0], oldOutputFileNameDbg))
                    os.rename(fileNameDbg[0], oldOutputFileNameDbg)

                #do debug_stream preRun step and get asetup string from debug_stream input files
                dbgAsetupString = dbgStream.dbgPreRun(inputFiles['BS_RDO'],
                                                      fileNameDbg)
                # setup asetup from debug_stream if no --asetup r2b:string was given and is not running with tzero/software/patches as TestArea
                if asetupString == None and dbgAsetupString != None:
                    asetupString = dbgAsetupString
                    msg.info(
                        'Will use asetup string for debug_stream analsys %s' %
                        dbgAsetupString)

        #call athenaExecutor parent as the above overrides what athenaExecutor would have done
        super(athenaExecutor, self).preExecute(input, output)

        # Now we always write a wrapper, because it's very convenient for re-running individual substeps
        # This will have asetup and/or DB release setups in it
        # Do this last in this preExecute as the _cmd needs to be finalised
        msg.info('Now writing wrapper for substep executor {0}'.format(
            self._name))
        self._writeAthenaWrapper(asetup=asetupString, dbsetup=dbsetup)
        msg.info('Athena will be executed in a subshell via {0}'.format(
            self._cmd))
Esempio n. 11
0
                         (argInDict.value[0], matchedOutputFileNames))
                argInDict.multipleOK = True
                argInDict.value = matchedOutputFileNames
                argInDict._dataset = dataset_argInDict
            elif (len(matchedOutputFileNames)):
                msg.info('Single BS file found: will rename file')
                msg.info('Renaming BS file from %s to %s' %
                         (matchedOutputFileNames[0], argInDict.value[0]))
                try:
                    os.rename(matchedOutputFileNames[0], argInDict.value[0])
                except OSError, e:
                    msg.error(
                        'Exception raised when renaming {0} #to {1}: {2}'.
                        format(expectedInput, inputFile, e))
                    raise trfExceptions.TransformExecutionException(
                        trfExit.nameToCode('TRF_OUTPUT_FILE_ERROR'),
                        'Exception raised when renaming {0} #to {1}: {2}'.
                        format(expectedInput, inputFile, e))
            else:
                msg.error('no BS files created with expected name: %s' %
                          expectedOutputFileName)
        else:
            msg.info(
                'BS output filetype not defined so skip BS filename check')

        #Run PostRun step debug_stream analysis if debug_stream=True
        if 'debug_stream' in self.conf.argdict:
            msg.info("debug_stream analysis in postExecute")

            #set default file name for debug_stream analysis output
            fileNameDbg = ['debug-stream-monitoring.root']
            if "outputHIST_DEBUGSTREAMMONFile" in self.conf.argdict:
Esempio n. 12
0
    def preExecute(self, input=set(), output=set()):
        # First we need to strip the filter file down to events that are present
        # in the RAW file we are going to skim. This is because the HI workflow
        # will provide millions of events in their filter file, more than acmd.py
        # can cope with.
        listEvtCommand = ['AtlListBSEvents', '-l']
        listEvtCommand.extend(self.conf.argdict['inputBSFile'].value)
        # For best lookup speed, we store the runnumber/eventnumber in a dictionary (set would also
        # be fast)
        rawEventList = {}
        try:
            for line in subprocess.check_output(listEvtCommand).split("\n"):
                if line.startswith("Index="):
                    try:
                        splitStrings = line.split(" ")
                        runprefix, runstr = splitStrings[1].split("=")
                        evtprefix, evtstr = splitStrings[2].split("=")
                        # Check sanity
                        if runprefix != "Run" or evtprefix != "Event":
                            msg.warning(
                                "Failed to understand this line from AtlListBSEvents: {0}"
                                .format(line))
                        else:
                            runnumber = int(runstr)
                            evtnumber = int(evtstr)
                            # We build up a string key as "RUN-EVENT", so that we can take advantage of
                            # the fast hash search against a dictionary
                            rawEventList[runstr + "-" + evtstr] = True
                            msg.debug(
                                "Identified run {0}, event {1} in input RAW files"
                                .format(runstr, evtstr))
                    except ValueError as e:
                        msg.warning(
                            "Failed to understand this line from AtlListBSEvents: {0}"
                            .format(line))
        except subprocess.CalledProcessError as e:
            errMsg = "Call to AtlListBSEvents failed: {0}".format(e)
            msg.error(erMsg)
            raise trfExceptions.TransformExecutionException(
                trfExit.nameToCode("TRF_EXEC_SETUP_FAIL"), errMsg)
        msg.info("Found {0} events as skim candidates in RAW inputs".format(
            len(rawEventList)))

        # Now open the list of filter events, and check through them
        slimmedFilterFile = "slimmedFilterFile.{0}".format(os.getpid())
        with open(slimmedFilterFile, "w") as slimFF, open(
                self.conf.argdict['filterFile'].value) as masterFF:
            count = 0
            for line in masterFF:
                try:
                    runstr, evtstr = line.split()
                    if runstr + "-" + evtstr in rawEventList:
                        msg.debug(
                            "Found run {0}, event {1} in master filter list".
                            format(runstr, evtstr))
                        os.write(slimFF.fileno(), line)
                        count += 1
                except ValueError as e:
                    msg.warning(
                        "Failed to understand this line from master filter file: {0} {1}"
                        .format(line, e))
            if count == 0:
                # If there are no matched events, create a bogus request for run and event 0 to keep
                # AtlCopyBSEvent.exe CLI
                msg.info(
                    "No events matched in this input file - empty RAW file output will be made"
                )
                os.write(slimFF.fileno(), "0 0\n")
        msg.info(
            "Matched {0} lines from the master filter file against input events; wrote these to {1}"
            .format(count, slimmedFilterFile))

        # Build up the right command line for acmd.py
        self._cmd = ['acmd.py', 'filter-files']

        self._cmd.extend(
            ('-o', self.conf.argdict['outputBS_SKIMFile'].value[0]))
        self._cmd.extend(('-s', slimmedFilterFile))
        self._cmd.extend(self.conf.argdict['inputBSFile'].value)

        super(skimRawExecutor, self).preExecute()