def execute(self, emulator = None): """ _execute_ """ #Are we using emulators again? if (emulator != None): return emulator.emulate( self.step, self.job ) overrides = {} if hasattr(self.step, 'override'): overrides = self.step.override.dictionary_() # Set wait to over an hour waitTime = overrides.get('waitTime', 3600 + (self.step.retryDelay * self.step.retryCount)) logging.info("StageOut override is: %s " % self.step) # Pull out StageOutMgr Overrides # switch between old stageOut behavior and new, fancy stage out behavior useNewStageOutCode = False if overrides.has_key('newStageOut') and overrides.get('newStageOut'): useNewStageOutCode = True stageOutCall = {} if overrides.has_key("command") and overrides.has_key("option") \ and overrides.has_key("se-name") and overrides.has_key("lfn-prefix"): logging.critical('using override in StageOut') stageOutCall['command'] = overrides.get('command') stageOutCall['option'] = overrides.get('option') stageOutCall['se-name'] = overrides.get('se-name') stageOutCall['lfn-prefix'] = overrides.get('lfn-prefix') # naw man, this is real # iterate over all the incoming files if not useNewStageOutCode: # old style manager = StageOutMgr.StageOutMgr(**stageOutCall) manager.numberOfRetries = self.step.retryCount manager.retryPauseTime = self.step.retryDelay else: # new style logging.critical("STAGEOUT IS USING NEW STAGEOUT CODE") print "STAGEOUT IS USING NEW STAGEOUT CODE" manager = WMCore.Storage.FileManager.StageOutMgr( retryPauseTime = self.step.retryDelay, numberOfRetries = self.step.retryCount, **stageOutCall) # We need to find a list of steps in our task # And eventually a list of jobReports for out steps # Search through steps for report files filesTransferred = [] for step in self.stepSpace.taskSpace.stepSpaces(): if step == self.stepName: #Don't try to parse your own report; it's not there yet continue stepLocation = os.path.join(self.stepSpace.taskSpace.location, step) logging.info("Beginning report processing for step %s" % (step)) reportLocation = os.path.join(stepLocation, 'Report.pkl') if not os.path.isfile(reportLocation): logging.error("Cannot find report for step %s in space %s" \ % (step, stepLocation)) continue # First, get everything from a file and 'unpersist' it stepReport = Report() stepReport.unpersist(reportLocation, step) taskID = getattr(stepReport.data, 'id', None) # Don't stage out files from bad steps. if not stepReport.stepSuccessful(step): continue # Okay, time to start using stuff # Now I'm a bit confused about this; each report should ONLY # Have the results of that particular step in it, # So getting all the files should get ONLY the files # for that step; or so I hope files = stepReport.getAllFileRefsFromStep(step = step) for file in files: if not hasattr(file, 'lfn') and hasattr(file, 'pfn'): # Then we're truly hosed on this file; ignore it msg = "Not a file: %s" % file logging.error(msg) continue # Support direct-to-merge # This requires pulling a bunch of stuff from everywhere # First check if it's needed if hasattr(self.step.output, 'minMergeSize') \ and hasattr(file, 'size') \ and not getattr(file, 'merged', False): # We need both of those to continue, and we don't # direct-to-merge if getattr(self.step.output, 'doNotDirectMerge', False): # Then we've been told explicitly not to do direct-to-merge continue if file.size >= self.step.output.minMergeSize: # Then this goes direct to merge try: file = self.handleLFNForMerge(mergefile = file, step = step) except Exception, ex: logging.error("Encountered error while handling LFN for merge due to size.\n") logging.error(str(ex)) logging.debug(file) logging.debug("minMergeSize: %s" % self.step.output.minMergeSize) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60401, "DirectToMergeFailure", str(ex)) elif getattr(self.step.output, 'maxMergeEvents', None) != None\ and getattr(file, 'events', None) != None\ and not getattr(file, 'merged', False): # Then direct-to-merge due to events if # the file is large enough: if file.events >= self.step.output.maxMergeEvents: # straight to merge try: file = self.handleLFNForMerge(mergefile = file, step = step) except Exception, ex: logging.error("Encountered error while handling LFN for merge due to events.\n") logging.error(str(ex)) logging.debug(file) logging.debug("maxMergeEvents: %s" % self.step.output.maxMergeEvents) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60402, "DirectToMergeFailure", str(ex)) # Save the input PFN in case we need it # Undecided whether to move file.pfn to the output PFN file.InputPFN = file.pfn lfn = getattr(file, 'lfn') fileSource = getattr(file, 'Source', None) if fileSource in ['TFileService', 'UserDefined']: userLfnRegEx(lfn) else: lfnRegEx(lfn) fileForTransfer = {'LFN': lfn, 'PFN': getattr(file, 'pfn'), 'SEName' : None, 'StageOutCommand': None} signal.signal(signal.SIGALRM, alarmHandler) signal.alarm(waitTime) try: manager(fileForTransfer) #Afterwards, the file should have updated info. filesTransferred.append(fileForTransfer) file.StageOutCommand = fileForTransfer['StageOutCommand'] file.location = fileForTransfer['SEName'] file.OutputPFN = fileForTransfer['PFN'] except Alarm: msg = "Indefinite hang during stageOut of logArchive" logging.error(msg) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60403, "StageOutTimeout", msg) stepReport.persist("Report.pkl") except Exception, ex: manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60307, "StageOutFailure", str(ex)) stepReport.setStepStatus(self.stepName, 1) stepReport.persist("Report.pkl") raise
def execute(self, emulator=None): """ _execute_ """ #Are we using emulators again? if (emulator != None): return emulator.emulate(self.step, self.job) overrides = {} if hasattr(self.step, 'override'): overrides = self.step.override.dictionary_() # Set wait to two hours per retry # this alarm leaves a subprocess behing that may cause trouble, see #6273 waitTime = overrides.get('waitTime', 7200 * self.step.retryCount) logging.info("StageOut override is: %s ", self.step) # Pull out StageOutMgr Overrides # switch between old stageOut behavior and new, fancy stage out behavior useNewStageOutCode = False if getattr(self.step, 'newStageout', False) or \ ('newStageOut' in overrides and overrides.get('newStageOut')): useNewStageOutCode = True stageOutCall = {} if "command" in overrides and "option" in overrides \ and "phedex-node" in overrides \ and"lfn-prefix" in overrides: logging.critical('using override in StageOut') stageOutCall['command'] = overrides.get('command') stageOutCall['option'] = overrides.get('option') stageOutCall['phedex-node'] = overrides.get('phedex-node') stageOutCall['lfn-prefix'] = overrides.get('lfn-prefix') # naw man, this is real # iterate over all the incoming files if not useNewStageOutCode: # old style manager = StageOutMgr(**stageOutCall) manager.numberOfRetries = self.step.retryCount manager.retryPauseTime = self.step.retryDelay else: # new style logging.critical("STAGEOUT IS USING NEW STAGEOUT CODE") print("STAGEOUT IS USING NEW STAGEOUT CODE") manager = FMStageOutMgr(retryPauseTime=self.step.retryDelay, numberOfRetries=self.step.retryCount, **stageOutCall) # We need to find a list of steps in our task # And eventually a list of jobReports for out steps # Search through steps for report files filesTransferred = [] for step in self.stepSpace.taskSpace.stepSpaces(): if step == self.stepName: #Don't try to parse your own report; it's not there yet continue stepLocation = os.path.join(self.stepSpace.taskSpace.location, step) logging.info("Beginning report processing for step %s", step) reportLocation = os.path.join(stepLocation, 'Report.pkl') if not os.path.isfile(reportLocation): logging.error("Cannot find report for step %s in space %s", step, stepLocation) continue # First, get everything from a file and 'unpersist' it stepReport = Report() stepReport.unpersist(reportLocation, step) # Don't stage out files from bad steps. if not stepReport.stepSuccessful(step): continue # Okay, time to start using stuff # Now I'm a bit confused about this; each report should ONLY # Have the results of that particular step in it, # So getting all the files should get ONLY the files # for that step; or so I hope files = stepReport.getAllFileRefsFromStep(step=step) for fileName in files: # make sure the file information is consistent if hasattr(fileName, 'pfn') and (not hasattr(fileName, 'lfn') or not hasattr(fileName, 'module_label')): msg = "Not a valid file: %s" % fileName logging.error(msg) continue # Figuring out if we should do straight to merge # - should we do straight to merge at all ? # - is straight to merge disabled for this output ? # - are we over the size threshold # - are we over the event threshold ? straightToMerge = False if not getattr(fileName, 'merged', False) and hasattr( self.step.output, 'minMergeSize'): if fileName.module_label not in getattr( self.step.output, 'forceUnmergedOutputs', []): if getattr(fileName, 'size', 0) >= self.step.output.minMergeSize: straightToMerge = True if getattr(fileName, 'events', 0) >= getattr( self.step.output, 'maxMergeEvents', sys.maxsize): straightToMerge = True if straightToMerge: try: fileName = self.handleLFNForMerge(mergefile=fileName, step=step) except Exception as ex: logging.info( "minMergeSize: %s", getattr(self.step.output, 'minMergeSize', None)) logging.info( "maxMergeEvents: %s", getattr(self.step.output, 'maxMergeEvents', None)) logging.error( "Encountered error while handling LFN for merge %s", fileName) logging.error(str(ex)) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60401, "DirectToMergeFailure", str(ex)) # Save the input PFN in case we need it # Undecided whether to move fileName.pfn to the output PFN fileName.InputPFN = fileName.pfn lfn = getattr(fileName, 'lfn') fileSource = getattr(fileName, 'Source', None) if fileSource in ['TFileService', 'UserDefined']: userLfnRegEx(lfn) else: lfnRegEx(lfn) fileForTransfer = { 'LFN': lfn, 'PFN': getattr(fileName, 'pfn'), 'PNN': None, 'StageOutCommand': None, 'Checksums': getattr(fileName, 'checksums', None) } signal.signal(signal.SIGALRM, alarmHandler) signal.alarm(waitTime) try: manager(fileForTransfer) #Afterwards, the file should have updated info. filesTransferred.append(fileForTransfer) fileName.StageOutCommand = fileForTransfer[ 'StageOutCommand'] fileName.location = fileForTransfer['PNN'] fileName.OutputPFN = fileForTransfer['PFN'] except Alarm: msg = "Indefinite hang during stageOut of logArchive" logging.error(msg) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60403, "StageOutTimeout", msg) stepReport.setStepStatus(self.stepName, 1) # well, if it fails for one file, it fails for the whole job... break except Exception as ex: manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60307, "StageOutFailure", str(ex)) stepReport.setStepStatus(self.stepName, 1) stepReport.persist(reportLocation) raise signal.alarm(0) # Am DONE with report. Persist it stepReport.persist(reportLocation) #Done with all steps, and should have a list of #stagedOut files in fileForTransfer logging.info("Transferred %i files", len(filesTransferred)) return
def execute(self, emulator = None): """ _execute_ """ #Are we using emulators again? if (emulator != None): return emulator.emulate( self.step, self.job ) overrides = {} if hasattr(self.step, 'override'): overrides = self.step.override.dictionary_() # Set wait to two hours per retry # this alarm leaves a subprocess behing that may cause trouble, see #6273 waitTime = overrides.get('waitTime', 7200 * self.step.retryCount) logging.info("StageOut override is: %s ", self.step) # Pull out StageOutMgr Overrides # switch between old stageOut behavior and new, fancy stage out behavior useNewStageOutCode = False if getattr(self.step, 'newStageout', False) or \ ('newStageOut' in overrides and overrides.get('newStageOut')): useNewStageOutCode = True stageOutCall = {} if "command" in overrides and "option" in overrides \ and "phedex-node" in overrides \ and"lfn-prefix" in overrides: logging.critical('using override in StageOut') stageOutCall['command'] = overrides.get('command') stageOutCall['option'] = overrides.get('option') stageOutCall['phedex-node']= overrides.get('phedex-node') stageOutCall['lfn-prefix'] = overrides.get('lfn-prefix') # naw man, this is real # iterate over all the incoming files if not useNewStageOutCode: # old style manager = StageOutMgr(**stageOutCall) manager.numberOfRetries = self.step.retryCount manager.retryPauseTime = self.step.retryDelay else: # new style logging.critical("STAGEOUT IS USING NEW STAGEOUT CODE") print("STAGEOUT IS USING NEW STAGEOUT CODE") manager = FMStageOutMgr(retryPauseTime = self.step.retryDelay, numberOfRetries = self.step.retryCount, **stageOutCall) # We need to find a list of steps in our task # And eventually a list of jobReports for out steps # Search through steps for report files filesTransferred = [] for step in self.stepSpace.taskSpace.stepSpaces(): if step == self.stepName: #Don't try to parse your own report; it's not there yet continue stepLocation = os.path.join(self.stepSpace.taskSpace.location, step) logging.info("Beginning report processing for step %s", step) reportLocation = os.path.join(stepLocation, 'Report.pkl') if not os.path.isfile(reportLocation): logging.error("Cannot find report for step %s in space %s", step, stepLocation) continue # First, get everything from a file and 'unpersist' it stepReport = Report() stepReport.unpersist(reportLocation, step) # Don't stage out files from bad steps. if not stepReport.stepSuccessful(step): continue # Okay, time to start using stuff # Now I'm a bit confused about this; each report should ONLY # Have the results of that particular step in it, # So getting all the files should get ONLY the files # for that step; or so I hope files = stepReport.getAllFileRefsFromStep(step = step) for fileName in files: # make sure the file information is consistent if hasattr(fileName, 'pfn') and ( not hasattr(fileName, 'lfn') or not hasattr(fileName, 'module_label') ): msg = "Not a valid file: %s" % fileName logging.error(msg) continue # Figuring out if we should do straight to merge # - should we do straight to merge at all ? # - is straight to merge disabled for this output ? # - are we over the size threshold # - are we over the event threshold ? straightToMerge = False if not getattr(fileName, 'merged', False) and hasattr(self.step.output, 'minMergeSize'): if fileName.module_label not in getattr(self.step.output, 'forceUnmergedOutputs', []): if getattr(fileName, 'size', 0) >= self.step.output.minMergeSize: straightToMerge = True if getattr(fileName, 'events', 0) >= getattr(self.step.output, 'maxMergeEvents', sys.maxsize): straightToMerge = True if straightToMerge: try: fileName = self.handleLFNForMerge(mergefile = fileName, step = step) except Exception as ex: logging.info("minMergeSize: %s", getattr(self.step.output, 'minMergeSize', None)) logging.info("maxMergeEvents: %s", getattr(self.step.output, 'maxMergeEvents', None)) logging.error("Encountered error while handling LFN for merge %s", fileName) logging.error(str(ex)) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60401, "DirectToMergeFailure", str(ex)) # Save the input PFN in case we need it # Undecided whether to move fileName.pfn to the output PFN fileName.InputPFN = fileName.pfn lfn = getattr(fileName, 'lfn') fileSource = getattr(fileName, 'Source', None) if fileSource in ['TFileService', 'UserDefined']: userLfnRegEx(lfn) else: lfnRegEx(lfn) fileForTransfer = {'LFN': lfn, 'PFN': getattr(fileName, 'pfn'), 'PNN' : None, 'StageOutCommand': None, 'Checksums' : getattr(fileName, 'checksums', None)} signal.signal(signal.SIGALRM, alarmHandler) signal.alarm(waitTime) try: manager(fileForTransfer) #Afterwards, the file should have updated info. filesTransferred.append(fileForTransfer) fileName.StageOutCommand = fileForTransfer['StageOutCommand'] fileName.location = fileForTransfer['PNN'] fileName.OutputPFN = fileForTransfer['PFN'] except Alarm: msg = "Indefinite hang during stageOut of logArchive" logging.error(msg) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60403, "StageOutTimeout", msg) stepReport.setStepStatus(self.stepName, 1) # well, if it fails for one file, it fails for the whole job... break except Exception as ex: manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60307, "StageOutFailure", str(ex)) stepReport.setStepStatus(self.stepName, 1) stepReport.persist(reportLocation) raise signal.alarm(0) # Am DONE with report. Persist it stepReport.persist(reportLocation) #Done with all steps, and should have a list of #stagedOut files in fileForTransfer logging.info("Transferred %i files", len(filesTransferred)) return
def execute(self, emulator = None): """ _execute_ """ #Are we using emulators again? if (emulator != None): return emulator.emulate( self.step, self.job ) overrides = {} if hasattr(self.step, 'override'): overrides = self.step.override.dictionary_() # Set wait to over an hour waitTime = overrides.get('waitTime', 3600 + (self.step.retryDelay * self.step.retryCount)) logging.info("StageOut override is: %s " % self.step) # Pull out StageOutMgr Overrides # switch between old stageOut behavior and new, fancy stage out behavior useNewStageOutCode = False if getattr(self.step, 'newStageout', False) or \ ('newStageOut' in overrides and overrides.get('newStageOut')): useNewStageOutCode = True stageOutCall = {} if "command" in overrides and "option" in overrides \ and "se-name" in overrides and "phedex-node" in overrides \ and"lfn-prefix" in overrides: logging.critical('using override in StageOut') stageOutCall['command'] = overrides.get('command') stageOutCall['option'] = overrides.get('option') stageOutCall['se-name'] = overrides.get('se-name') stageOutCall['phedex-node']= overrides.get('phedex-node') stageOutCall['lfn-prefix'] = overrides.get('lfn-prefix') # naw man, this is real # iterate over all the incoming files if not useNewStageOutCode: # old style manager = StageOutMgr.StageOutMgr(**stageOutCall) manager.numberOfRetries = self.step.retryCount manager.retryPauseTime = self.step.retryDelay else: # new style logging.critical("STAGEOUT IS USING NEW STAGEOUT CODE") print "STAGEOUT IS USING NEW STAGEOUT CODE" manager = WMCore.Storage.FileManager.StageOutMgr( retryPauseTime = self.step.retryDelay, numberOfRetries = self.step.retryCount, **stageOutCall) # We need to find a list of steps in our task # And eventually a list of jobReports for out steps # Search through steps for report files filesTransferred = [] for step in self.stepSpace.taskSpace.stepSpaces(): if step == self.stepName: #Don't try to parse your own report; it's not there yet continue stepLocation = os.path.join(self.stepSpace.taskSpace.location, step) logging.info("Beginning report processing for step %s" % (step)) reportLocation = os.path.join(stepLocation, 'Report.pkl') if not os.path.isfile(reportLocation): logging.error("Cannot find report for step %s in space %s" \ % (step, stepLocation)) continue # First, get everything from a file and 'unpersist' it stepReport = Report() stepReport.unpersist(reportLocation, step) taskID = getattr(stepReport.data, 'id', None) # Don't stage out files from bad steps. if not stepReport.stepSuccessful(step): continue # Okay, time to start using stuff # Now I'm a bit confused about this; each report should ONLY # Have the results of that particular step in it, # So getting all the files should get ONLY the files # for that step; or so I hope files = stepReport.getAllFileRefsFromStep(step = step) for file in files: if not hasattr(file, 'lfn') and hasattr(file, 'pfn'): # Then we're truly hosed on this file; ignore it msg = "Not a file: %s" % file logging.error(msg) continue # Support direct-to-merge # This requires pulling a bunch of stuff from everywhere # First check if it's needed if hasattr(self.step.output, 'minMergeSize') \ and hasattr(file, 'size') \ and not getattr(file, 'merged', False): # We need both of those to continue, and we don't # direct-to-merge if getattr(self.step.output, 'doNotDirectMerge', False): # Then we've been told explicitly not to do direct-to-merge continue if file.size >= self.step.output.minMergeSize: # Then this goes direct to merge try: file = self.handleLFNForMerge(mergefile = file, step = step) except Exception as ex: logging.error("Encountered error while handling LFN for merge due to size.\n") logging.error(str(ex)) logging.debug(file) logging.debug("minMergeSize: %s" % self.step.output.minMergeSize) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60401, "DirectToMergeFailure", str(ex)) elif getattr(self.step.output, 'maxMergeEvents', None) != None\ and getattr(file, 'events', None) != None\ and not getattr(file, 'merged', False): # Then direct-to-merge due to events if # the file is large enough: if file.events >= self.step.output.maxMergeEvents: # straight to merge try: file = self.handleLFNForMerge(mergefile = file, step = step) except Exception as ex: logging.error("Encountered error while handling LFN for merge due to events.\n") logging.error(str(ex)) logging.debug(file) logging.debug("maxMergeEvents: %s" % self.step.output.maxMergeEvents) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60402, "DirectToMergeFailure", str(ex)) # Save the input PFN in case we need it # Undecided whether to move file.pfn to the output PFN file.InputPFN = file.pfn lfn = getattr(file, 'lfn') fileSource = getattr(file, 'Source', None) if fileSource in ['TFileService', 'UserDefined']: userLfnRegEx(lfn) else: lfnRegEx(lfn) fileForTransfer = {'LFN': lfn, 'PFN': getattr(file, 'pfn'), 'SEName' : None, 'PNN' : None, 'StageOutCommand': None, 'Checksums' : getattr(file, 'checksums', None)} signal.signal(signal.SIGALRM, alarmHandler) signal.alarm(waitTime) try: manager(fileForTransfer) #Afterwards, the file should have updated info. filesTransferred.append(fileForTransfer) file.StageOutCommand = fileForTransfer['StageOutCommand'] # file.location = fileForTransfer['SEName'] file.location = fileForTransfer['PNN'] file.OutputPFN = fileForTransfer['PFN'] except Alarm: msg = "Indefinite hang during stageOut of logArchive" logging.error(msg) manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60403, "StageOutTimeout", msg) stepReport.persist("Report.pkl") except Exception as ex: manager.cleanSuccessfulStageOuts() stepReport.addError(self.stepName, 60307, "StageOutFailure", str(ex)) stepReport.setStepStatus(self.stepName, 1) stepReport.persist("Report.pkl") raise signal.alarm(0) # Am DONE with report # Persist it stepReport.persist(reportLocation) #Done with all steps, and should have a list of #stagedOut files in fileForTransfer logging.info("Transferred %i files" %(len(filesTransferred))) return