def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,commandMap in taskList: # make logger tmpLog = MsgWrapper(self.logger,' <jediTaskID={0}>'.format(jediTaskID)) commandStr = commandMap['command'] commentStr = commandMap['comment'] oldStatus = commandMap['oldStatus'] tmpLog.info('start for {0}'.format(commandStr)) tmpStat = Interaction.SC_SUCCEEDED if commandStr in ['kill','finish','reassign']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.sendMsg(tmpMsg,self.msgType) # loop twice to see immediate result for iLoop in range(2): # get active PandaIDs to be killed if commandStr == 'reassign' and commentStr != None and 'soft reassign' in commentStr: pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID) else: pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True) if pandaIDs == None: tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID)) tmpStat = Interaction.SC_FAILED # kill jobs or update task if tmpStat == Interaction.SC_SUCCEEDED: if pandaIDs == []: # done since no active jobs tmpMsg = 'completed cleaning jobs' tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID updateTaskStatus = True if commandStr != 'reassign': # reset oldStatus # keep oldStatus for task reassignment since it is reset when actually reassigned tmpTaskSpec.forceUpdate('oldStatus') else: # extract cloud or site if commentStr != None: tmpItems = commentStr.split(':') if tmpItems[0] == 'cloud': tmpTaskSpec.cloud = tmpItems[1] else: tmpTaskSpec.site = tmpItems[1] tmpMsg = 'set {0}={1}'.format(tmpItems[0],tmpItems[1]) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) # back to oldStatus if necessary if tmpItems[2] == 'y': tmpTaskSpec.status = oldStatus tmpTaskSpec.forceUpdate('oldStatus') updateTaskStatus = False if commandStr == 'reassign': tmpTaskSpec.forceUpdate('errorDialog') if updateTaskStatus: tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done'] tmpMsg = 'set task.status={0}'.format(tmpTaskSpec.status) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID}) tmpLog.info('done with {0}'.format(str(tmpRet))) break else: # kill only in the first loop if iLoop > 0: break # wait or kill jobs if 'soft finish' in commentStr: tmpMsg = "wating {0} jobs for soft finish".format(len(pandaIDs)) tmpLog.info(tmpMsg) tmpRet = True tmpLog.info('done with {0}'.format(str(tmpRet))) break else: tmpMsg = "trying to kill {0} jobs".format(len(pandaIDs)) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) if commandStr in ['reassign','finish']: # force kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'52',True) else: # normal kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True) tmpLog.info('done with {0}'.format(str(tmpRet))) elif commandStr in ['retry','incexec']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.sendMsg(tmpMsg,self.msgType) # change task params for incexec if commandStr == 'incexec': try: # read task params taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) # remove some params for newKey in ['nFiles','fixedSandbox']: try: del taskParamMap[newKey] except: pass # convert new params newParamMap = RefinerUtils.decodeJSON(commentStr) # change params for newKey,newVal in newParamMap.iteritems(): if newVal == None: # delete if newKey in taskParamMap: del taskParamMap[newKey] else: # change taskParamMap[newKey] = newVal # overwrite sandbox if 'fixedSandbox' in taskParamMap: # noBuild for tmpParam in taskParamMap['jobParameters']: if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None: tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox'] # build if taskParamMap.has_key('buildSpec'): taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox'] # merge if taskParamMap.has_key('mergeSpec'): taskParamMap['mergeSpec']['jobParameters'] = \ re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters']) # encode new param strTaskParams = RefinerUtils.encodeJSON(taskParamMap) tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams) if tmpRet != True: tmpLog.error('failed to update task params') continue except: errtype,errvalue = sys.exc_info()[:2] tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue)) continue # retry failed files tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr) if tmpRet == True: tmpMsg = 'set task.status={0}'.format(newTaskStatus) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpLog.info('done with {0}'.format(tmpRet)) else: tmpLog.error('unknown command') except: errtype,errvalue = sys.exc_info()[:2] errStr = '{0} failed in runImpl() with {1}:{2} '.format(self.__class__.__name__,errtype.__name__,errvalue) errStr += traceback.format_exc() logger.error(errStr)
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,commandMap in taskList: # make logger tmpLog = MsgWrapper(self.logger,' <jediTaskID={0}>'.format(jediTaskID)) commandStr = commandMap['command'] commentStr = commandMap['comment'] oldStatus = commandMap['oldStatus'] tmpLog.info('start for {0}'.format(commandStr)) tmpStat = Interaction.SC_SUCCEEDED if commandStr in ['kill','finish','reassign']: # get active PandaIDs to be killed pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True) if pandaIDs == None: tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID)) tmpStat = Interaction.SC_FAILED # kill jobs or update task if tmpStat == Interaction.SC_SUCCEEDED: if pandaIDs == []: # done since no active jobs tmpLog.info('completed the command') tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID updateTaskStatus = True if commandStr != 'reassign': # keep oldStatus for task reassignment since it is reset when actually reassigned tmpTaskSpec.forceUpdate('oldStatus') else: # extract cloud or site tmpItems = commentStr.split(':') if tmpItems[0] == 'cloud': tmpTaskSpec.cloud = tmpItems[1] else: tmpTaskSpec.site = tmpItems[1] # back to oldStatus if necessary if tmpItems[2] == 'y': tmpTaskSpec.status = oldStatus tmpTaskSpec.forceUpdate('oldStatus') updateTaskStatus = False if updateTaskStatus: tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done'] tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID}) else: tmpLog.info('sending kill command') tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True) tmpLog.info('done with {0}'.format(str(tmpRet))) elif commandStr in ['retry','incexec']: # change task params for incexec if commandStr == 'incexec': try: # read task params taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) # remove some params for newKey in ['nFiles','fixedSandbox']: try: del taskParamMap[newKey] except: pass # convert new params newParamMap = RefinerUtils.decodeJSON(commentStr) # change params for newKey,newVal in newParamMap.iteritems(): if newVal == None: # delete if newKey in taskParamMap: del taskParamMap[newKey] else: # change taskParamMap[newKey] = newVal # overwrite sandbox if 'fixedSandbox' in taskParamMap: # noBuild for tmpParam in taskParamMap['jobParameters']: if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None: tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox'] # build if taskParamMap.has_key('buildSpec'): taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox'] # merge if taskParamMap.has_key('mergeSpec'): taskParamMap['mergeSpec']['jobParameters'] = \ re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters']) # encode new param strTaskParams = RefinerUtils.encodeJSON(taskParamMap) tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams) if tmpRet != True: tmpLog.error('failed to update task params') continue except: errtype,errvalue = sys.exc_info()[:2] tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue)) continue # retry failed files tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr) if tmpRet == True: tmpMsg = 'set task.status={0}'.format(newTaskStatus) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpLog.info('done with {0}'.format(tmpRet)) else: tmpLog.error('unknown command') except: errtype,errvalue = sys.exc_info()[:2] logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,splitRule,taskStatus,parent_tid in taskList: # make logger tmpLog = MsgWrapper(self.logger,'<jediTaskID={0}>'.format(jediTaskID)) tmpLog.info('start') tmpStat = Interaction.SC_SUCCEEDED errStr = '' # read task parameters try: taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) except: errtype,errvalue = sys.exc_info()[:2] errStr = 'conversion to map from json failed with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # get impl if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('getting Impl') try: # get VO and sourceLabel vo = taskParamMap['vo'] prodSourceLabel = taskParamMap['prodSourceLabel'] taskType = taskParamMap['taskType'] tmpLog.info('vo={0} sourceLabel={1} taskType={2}'.format(vo,prodSourceLabel,taskType)) # get impl impl = self.implFactory.instantiateImpl(vo,prodSourceLabel,taskType, self.taskBufferIF,self.ddmIF) if impl == None: # task refiner is undefined errStr = 'task refiner is undefined for vo={0} sourceLabel={1}'.format(vo,prodSourceLabel) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to get task refiner with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # extract common parameters if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('extracting common') try: # initalize impl impl.initializeRefiner(tmpLog) # extarct common parameters impl.extractCommon(jediTaskID,taskParamMap,self.workQueueMapper,splitRule) except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to extract common parameters with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # check parent noWaitParent = False if tmpStat == Interaction.SC_SUCCEEDED: if not parent_tid in [None,jediTaskID]: tmpLog.info('check parent task') try: tmpStat = self.taskBufferIF.checkParentTask_JEDI(parent_tid) if tmpStat == 'completed': # parent is done tmpStat = Interaction.SC_SUCCEEDED elif tmpStat == 'running': if not impl.taskSpec.noWaitParent(): # parent is running errStr = 'pending until parent task {0} is done'.format(parent_tid) impl.taskSpec.status = taskStatus impl.taskSpec.setOnHold() impl.taskSpec.setErrDiag(errStr) tmpLog.info(errStr) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}) continue else: # not wait for parent tmpStat = Interaction.SC_SUCCEEDED noWaitParent = True else: # parent is corrupted tmpStat = Interaction.SC_FAILED tmpErrStr = 'parent task {0} failed to complete'.format(parent_tid) impl.taskSpec.setErrDiag(tmpErrStr) except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to check parent task with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # refine if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('refining with {0}'.format(impl.__class__.__name__)) try: tmpStat = impl.doRefine(jediTaskID,taskParamMap) except: errtype,errvalue = sys.exc_info()[:2] # no wait for parent if impl.taskSpec.noWaitParent() and errtype == JediException.UnknownDatasetError: impl.taskSpec.status = taskStatus impl.taskSpec.setOnHold() errStr = 'pending until parent produces input' tmpLog.info(errStr) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}) continue else: errStr = 'failed to refine task' tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # register if tmpStat != Interaction.SC_SUCCEEDED: tmpLog.error('failed to refine the task') if impl == None or impl.taskSpec == None: tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID else: tmpTaskSpec = impl.taskSpec tmpTaskSpec.status = 'tobroken' if errStr != '': tmpTaskSpec.setErrDiag(errStr,True) self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':tmpTaskSpec.jediTaskID}) else: tmpLog.info('registering') # fill JEDI tables try: # enable protection against task duplication if taskParamMap.has_key('uniqueTaskName') and taskParamMap['uniqueTaskName'] and \ not impl.taskSpec.checkPreProcessed(): uniqueTaskName = True else: uniqueTaskName = False strTaskParams = None if impl.updatedTaskParams != None: strTaskParams = RefinerUtils.encodeJSON(impl.updatedTaskParams) if taskStatus == 'registered': # unset pre-process flag if impl.taskSpec.checkPreProcessed(): impl.taskSpec.setPostPreProcess() # full registration tmpStat,newTaskStatus = self.taskBufferIF.registerTaskInOneShot_JEDI(jediTaskID,impl.taskSpec, impl.inMasterDatasetSpec, impl.inSecDatasetSpecList, impl.outDatasetSpecList, impl.outputTemplateMap, impl.jobParamsTemplate, strTaskParams, impl.unmergeMasterDatasetSpec, impl.unmergeDatasetSpecMap, uniqueTaskName) if not tmpStat: tmpErrStr = 'failed to register the task to JEDI in a single shot' tmpLog.error(tmpErrStr) impl.taskSpec.status = 'tobroken' impl.taskSpec.setErrDiag(tmpErrStr,True) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}) tmpMsg = 'set task.status={0}'.format(newTaskStatus) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) else: # appending for incremetnal execution tmpStat = self.taskBufferIF.appendDatasets_JEDI(jediTaskID,impl.inMasterDatasetSpec, impl.inSecDatasetSpecList) if not tmpStat: tmpLog.error('failed to append datasets for incexec') except: errtype,errvalue = sys.exc_info()[:2] tmpErrStr = 'failed to register the task to JEDI with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(tmpErrStr) else: tmpLog.info('done') except: errtype,errvalue = sys.exc_info()[:2] logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,commandMap in taskList: # make logger tmpLog = MsgWrapper(self.logger,' < jediTaskID={0} >'.format(jediTaskID)) commandStr = commandMap['command'] commentStr = commandMap['comment'] oldStatus = commandMap['oldStatus'] tmpLog.info('start for {0}'.format(commandStr)) tmpStat = Interaction.SC_SUCCEEDED if commandStr in ['kill','finish','reassign']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) # loop twice to see immediate result for iLoop in range(2): # get active PandaIDs to be killed if commandStr == 'reassign' and commentStr != None and 'soft reassign' in commentStr: pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID) elif commandStr == 'reassign' and commentStr != None and 'nokill reassign' in commentStr: pandaIDs = [] else: pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True) if pandaIDs == None: tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID)) tmpStat = Interaction.SC_FAILED # kill jobs or update task if tmpStat == Interaction.SC_SUCCEEDED: if pandaIDs == []: # done since no active jobs tmpMsg = 'completed cleaning jobs' tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID updateTaskStatus = True if commandStr != 'reassign': # reset oldStatus # keep oldStatus for task reassignment since it is reset when actually reassigned tmpTaskSpec.forceUpdate('oldStatus') else: # extract cloud or site if commentStr != None: tmpItems = commentStr.split(':') if tmpItems[0] == 'cloud': tmpTaskSpec.cloud = tmpItems[1] elif tmpItems[0] == 'nucleus': tmpTaskSpec.nucleus = tmpItems[1] else: tmpTaskSpec.site = tmpItems[1] tmpMsg = 'set {0}={1}'.format(tmpItems[0],tmpItems[1]) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) # back to oldStatus if necessary if tmpItems[2] == 'y': tmpTaskSpec.status = oldStatus tmpTaskSpec.forceUpdate('oldStatus') updateTaskStatus = False if commandStr == 'reassign': tmpTaskSpec.forceUpdate('errorDialog') if commandStr == 'finish': # update datasets tmpLog.info('updating datasets to finish') tmpStat = self.taskBufferIF.updateDatasetsToFinishTask_JEDI(jediTaskID, self.pid) if not tmpStat: tmpLog.info('wait until datasets are updated to finish') # ignore failGoalUnreached when manually finished tmpStat,taskSpec = self.taskBufferIF.getTaskWithID_JEDI(jediTaskID) tmpTaskSpec.splitRule = taskSpec.splitRule tmpTaskSpec.unsetFailGoalUnreached() if updateTaskStatus: tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done'] tmpMsg = 'set task_status={0}'.format(tmpTaskSpec.status) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID}, setOldModTime=True) tmpLog.info('done with {0}'.format(str(tmpRet))) break else: # kill only in the first loop if iLoop > 0: break # wait or kill jobs if 'soft finish' in commentStr: queuedPandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID) tmpMsg = "trying to kill {0} queued jobs for soft finish".format(len(queuedPandaIDs)) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.killJobs(queuedPandaIDs,commentStr,'52',True) tmpMsg = "wating {0} jobs for soft finish".format(len(pandaIDs)) tmpLog.info(tmpMsg) tmpRet = True tmpLog.info('done with {0}'.format(str(tmpRet))) break else: tmpMsg = "trying to kill {0} jobs".format(len(pandaIDs)) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) if commandStr in ['finish']: # force kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'52',True) elif commandStr in ['reassign']: # force kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'51',True) else: # normal kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True) tmpLog.info('done with {0}'.format(str(tmpRet))) elif commandStr in ['retry','incexec']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) # change task params for incexec if commandStr == 'incexec': try: # read task params taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) # remove some params for newKey in ['nFiles','fixedSandbox']: try: del taskParamMap[newKey] except: pass # convert new params newParamMap = RefinerUtils.decodeJSON(commentStr) # change params for newKey,newVal in newParamMap.iteritems(): if newVal == None: # delete if newKey in taskParamMap: del taskParamMap[newKey] else: # change taskParamMap[newKey] = newVal # overwrite sandbox if 'fixedSandbox' in taskParamMap: # noBuild for tmpParam in taskParamMap['jobParameters']: if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None: tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox'] # build if taskParamMap.has_key('buildSpec'): taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox'] # merge if taskParamMap.has_key('mergeSpec'): taskParamMap['mergeSpec']['jobParameters'] = \ re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters']) # encode new param strTaskParams = RefinerUtils.encodeJSON(taskParamMap) tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams) if tmpRet != True: tmpLog.error('failed to update task params') continue except: errtype,errvalue = sys.exc_info()[:2] tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue)) continue # retry child tasks if 'sole ' in commentStr: retryChildTasks = False else: retryChildTasks = True # discard events if 'discard ' in commentStr: discardEvents = True else: discardEvents = False tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr, retryChildTasks=retryChildTasks, discardEvents=discardEvents) if tmpRet == True: tmpMsg = 'set task_status={0}'.format(newTaskStatus) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpLog.info('done with {0}'.format(tmpRet)) else: tmpLog.error('unknown command') except: errtype,errvalue = sys.exc_info()[:2] errStr = '{0} failed in runImpl() with {1}:{2} '.format(self.__class__.__name__,errtype.__name__,errvalue) errStr += traceback.format_exc() logger.error(errStr)
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug( '{0} terminating since no more items'.format( self.__class__.__name__)) return # loop over all tasks for jediTaskID, commandMap in taskList: # make logger tmpLog = MsgWrapper( self.logger, ' < jediTaskID={0} >'.format(jediTaskID)) commandStr = commandMap['command'] commentStr = commandMap['comment'] oldStatus = commandMap['oldStatus'] tmpLog.info('start for {0}'.format(commandStr)) tmpStat = Interaction.SC_SUCCEEDED if commandStr in ['kill', 'finish', 'reassign']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg, self.msgType) # loop twice to see immediate result for iLoop in range(2): # get active PandaIDs to be killed if commandStr == 'reassign' and commentStr is not None and 'soft reassign' in commentStr: pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI( jediTaskID) elif commandStr == 'reassign' and commentStr is not None and 'nokill reassign' in commentStr: pandaIDs = [] else: pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI( jediTaskID, True) if pandaIDs is None: tmpLog.error( 'failed to get PandaIDs for jediTaskID={0}' .format(jediTaskID)) tmpStat = Interaction.SC_FAILED # kill jobs or update task if tmpStat == Interaction.SC_SUCCEEDED: if pandaIDs == []: # done since no active jobs tmpMsg = 'completed cleaning jobs' tmpLog.sendMsg(tmpMsg, self.msgType) tmpLog.info(tmpMsg) tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID updateTaskStatus = True if commandStr != 'reassign': # reset oldStatus # keep oldStatus for task reassignment since it is reset when actually reassigned tmpTaskSpec.forceUpdate('oldStatus') else: # extract cloud or site if commentStr is not None: tmpItems = commentStr.split(':') if tmpItems[0] == 'cloud': tmpTaskSpec.cloud = tmpItems[1] elif tmpItems[0] == 'nucleus': tmpTaskSpec.nucleus = tmpItems[ 1] else: tmpTaskSpec.site = tmpItems[1] tmpMsg = 'set {0}={1}'.format( tmpItems[0], tmpItems[1]) tmpLog.sendMsg( tmpMsg, self.msgType) tmpLog.info(tmpMsg) # back to oldStatus if necessary if tmpItems[2] == 'y': tmpTaskSpec.status = oldStatus tmpTaskSpec.forceUpdate( 'oldStatus') updateTaskStatus = False if commandStr == 'reassign': tmpTaskSpec.forceUpdate('errorDialog') if commandStr == 'finish': # update datasets tmpLog.info( 'updating datasets to finish') tmpStat = self.taskBufferIF.updateDatasetsToFinishTask_JEDI( jediTaskID, self.pid) if not tmpStat: tmpLog.info( 'wait until datasets are updated to finish' ) # ignore failGoalUnreached when manually finished tmpStat, taskSpec = self.taskBufferIF.getTaskWithID_JEDI( jediTaskID) tmpTaskSpec.splitRule = taskSpec.splitRule tmpTaskSpec.unsetFailGoalUnreached() if updateTaskStatus: tmpTaskSpec.status = JediTaskSpec.commandStatusMap( )[commandStr]['done'] tmpMsg = 'set task_status={0}'.format( tmpTaskSpec.status) tmpLog.sendMsg(tmpMsg, self.msgType) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.updateTask_JEDI( tmpTaskSpec, {'jediTaskID': jediTaskID}, setOldModTime=True) tmpLog.info('done with {0}'.format( str(tmpRet))) break else: # kill only in the first loop if iLoop > 0: break # wait or kill jobs if commentStr and 'soft finish' in commentStr: queuedPandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI( jediTaskID) tmpMsg = "trying to kill {0} queued jobs for soft finish".format( len(queuedPandaIDs)) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.killJobs( queuedPandaIDs, commentStr, '52', True) tmpMsg = "wating {0} jobs for soft finish".format( len(pandaIDs)) tmpLog.info(tmpMsg) tmpRet = True tmpLog.info('done with {0}'.format( str(tmpRet))) break else: tmpMsg = "trying to kill {0} jobs".format( len(pandaIDs)) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg, self.msgType) if commandStr in ['finish']: # force kill tmpRet = self.taskBufferIF.killJobs( pandaIDs, commentStr, '52', True) elif commandStr in ['reassign']: # force kill tmpRet = self.taskBufferIF.killJobs( pandaIDs, commentStr, '51', True) else: # normal kill tmpRet = self.taskBufferIF.killJobs( pandaIDs, commentStr, '50', True) tmpLog.info('done with {0}'.format( str(tmpRet))) elif commandStr in ['retry', 'incexec']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg, self.msgType) # change task params for incexec if commandStr == 'incexec': try: # read task params taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI( jediTaskID) taskParamMap = RefinerUtils.decodeJSON( taskParam) # remove some params for newKey in ['nFiles', 'fixedSandbox']: try: del taskParamMap[newKey] except Exception: pass # convert new params newParamMap = RefinerUtils.decodeJSON( commentStr) # change params for newKey, newVal in iteritems(newParamMap): if newVal is None: # delete if newKey in taskParamMap: del taskParamMap[newKey] else: # change taskParamMap[newKey] = newVal # overwrite sandbox if 'fixedSandbox' in taskParamMap: # noBuild for tmpParam in taskParamMap[ 'jobParameters']: if tmpParam[ 'type'] == 'constant' and re.search( '^-a [^ ]+$', tmpParam['value'] ) is not None: tmpParam['value'] = '-a {0}'.format( taskParamMap['fixedSandbox']) # build if 'buildSpec' in taskParamMap: taskParamMap['buildSpec'][ 'archiveName'] = taskParamMap[ 'fixedSandbox'] # merge if 'mergeSpec' in taskParamMap: taskParamMap['mergeSpec']['jobParameters'] = \ re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters']) # encode new param strTaskParams = RefinerUtils.encodeJSON( taskParamMap) tmpRet = self.taskBufferIF.updateTaskParams_JEDI( jediTaskID, strTaskParams) if tmpRet is not True: tmpLog.error( 'failed to update task params') continue except Exception as e: tmpLog.error( 'failed to change task params with {} {}'. format(str(e), traceback.format_exc())) continue # retry child tasks if 'sole ' in commentStr: retryChildTasks = False else: retryChildTasks = True # discard events if 'discard ' in commentStr: discardEvents = True else: discardEvents = False # release un-staged files if 'staged ' in commentStr: releaseUnstaged = True else: releaseUnstaged = False tmpRet, newTaskStatus = self.taskBufferIF.retryTask_JEDI( jediTaskID, commandStr, retryChildTasks=retryChildTasks, discardEvents=discardEvents, release_unstaged=releaseUnstaged) if tmpRet is True: tmpMsg = 'set task_status={0}'.format( newTaskStatus) tmpLog.sendMsg(tmpMsg, self.msgType) tmpLog.info(tmpMsg) tmpLog.info('done with {0}'.format(tmpRet)) else: tmpLog.error('unknown command') except Exception as e: errStr = '{} failed in runImpl() with {} {} '.format( self.__class__.__name__, str(e), traceback.format_exc()) logger.error(errStr)
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.info('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,splitRule,taskStatus,parent_tid in taskList: # make logger tmpLog = MsgWrapper(self.logger,'< jediTaskID={0} >'.format(jediTaskID)) tmpLog.debug('start') tmpStat = Interaction.SC_SUCCEEDED errStr = '' # read task parameters try: taskParam = None taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) except: errtype,errvalue = sys.exc_info()[:2] errStr = 'conversion to map from json failed with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.debug(taskParam) tmpLog.error(errStr) continue tmpStat = Interaction.SC_FAILED # get impl if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('getting Impl') try: # get VO and sourceLabel vo = taskParamMap['vo'] prodSourceLabel = taskParamMap['prodSourceLabel'] taskType = taskParamMap['taskType'] tmpLog.info('vo={0} sourceLabel={1} taskType={2}'.format(vo,prodSourceLabel,taskType)) # get impl impl = self.implFactory.instantiateImpl(vo,prodSourceLabel,taskType, self.taskBufferIF,self.ddmIF) if impl == None: # task refiner is undefined errStr = 'task refiner is undefined for vo={0} sourceLabel={1}'.format(vo,prodSourceLabel) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to get task refiner with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # extract common parameters if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('extracting common') try: # initalize impl impl.initializeRefiner(tmpLog) impl.oldTaskStatus = taskStatus # extract common parameters impl.extractCommon(jediTaskID,taskParamMap,self.workQueueMapper,splitRule) # set parent tid if not parent_tid in [None,jediTaskID]: impl.taskSpec.parent_tid = parent_tid except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to extract common parameters with {0}:{1} {2}'.format(errtype.__name__,errvalue, traceback.format_exc()) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # check attribute length if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('checking attribute length') if not impl.taskSpec.checkAttrLength(): tmpLog.error(impl.taskSpec.errorDialog) tmpStat = Interaction.SC_FAILED # check parent noWaitParent = False parentState = None if tmpStat == Interaction.SC_SUCCEEDED: if not parent_tid in [None,jediTaskID]: tmpLog.info('check parent task') try: tmpStat = self.taskBufferIF.checkParentTask_JEDI(parent_tid) parentState = tmpStat if tmpStat == 'completed': # parent is done tmpStat = Interaction.SC_SUCCEEDED elif tmpStat == 'running': if not impl.taskSpec.noWaitParent(): # parent is running errStr = 'pending until parent task {0} is done'.format(parent_tid) impl.taskSpec.status = taskStatus impl.taskSpec.setOnHold() impl.taskSpec.setErrDiag(errStr) tmpLog.info(errStr) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus],setFrozenTime=False) continue else: # not wait for parent tmpStat = Interaction.SC_SUCCEEDED noWaitParent = True else: # parent is corrupted tmpStat = Interaction.SC_FAILED tmpErrStr = 'parent task {0} failed to complete'.format(parent_tid) impl.taskSpec.setErrDiag(tmpErrStr) except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to check parent task with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # refine if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('refining with {0}'.format(impl.__class__.__name__)) try: tmpStat = impl.doRefine(jediTaskID,taskParamMap) except: errtype,errvalue = sys.exc_info()[:2] # wait unknown input if noWaitParent or waitInput if ((impl.taskSpec.noWaitParent() or impl.taskSpec.waitInput()) \ and errtype == JediException.UnknownDatasetError) or parentState == 'running' \ or errtype == Interaction.JEDITemporaryError: if impl.taskSpec.noWaitParent() or parentState == 'running': tmpErrStr = 'pending until parent produces input' setFrozenTime=False elif errtype == Interaction.JEDITemporaryError: tmpErrStr = 'pending due to DDM problem. {0}'.format(errvalue) setFrozenTime=True else: tmpErrStr = 'pending until input is staged' setFrozenTime=True impl.taskSpec.status = taskStatus impl.taskSpec.setOnHold() impl.taskSpec.setErrDiag(tmpErrStr) tmpLog.info(tmpErrStr) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus], insertUnknown=impl.unknownDatasetList, setFrozenTime=setFrozenTime) continue else: errStr = 'failed to refine task with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # register if tmpStat != Interaction.SC_SUCCEEDED: tmpLog.error('failed to refine the task') if impl == None or impl.taskSpec == None: tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID else: tmpTaskSpec = impl.taskSpec tmpTaskSpec.status = 'tobroken' if errStr != '': tmpTaskSpec.setErrDiag(errStr,True) self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':tmpTaskSpec.jediTaskID},oldStatus=[taskStatus]) else: tmpLog.info('registering') # fill JEDI tables try: # enable protection against task duplication if taskParamMap.has_key('uniqueTaskName') and taskParamMap['uniqueTaskName'] and \ not impl.taskSpec.checkPreProcessed(): uniqueTaskName = True else: uniqueTaskName = False strTaskParams = None if impl.updatedTaskParams != None: strTaskParams = RefinerUtils.encodeJSON(impl.updatedTaskParams) if taskStatus == 'registered': # unset pre-process flag if impl.taskSpec.checkPreProcessed(): impl.taskSpec.setPostPreProcess() # full registration tmpStat,newTaskStatus = self.taskBufferIF.registerTaskInOneShot_JEDI(jediTaskID,impl.taskSpec, impl.inMasterDatasetSpec, impl.inSecDatasetSpecList, impl.outDatasetSpecList, impl.outputTemplateMap, impl.jobParamsTemplate, strTaskParams, impl.unmergeMasterDatasetSpec, impl.unmergeDatasetSpecMap, uniqueTaskName, taskStatus) if not tmpStat: tmpErrStr = 'failed to register the task to JEDI in a single shot' tmpLog.error(tmpErrStr) impl.taskSpec.status = newTaskStatus impl.taskSpec.setErrDiag(tmpErrStr,True) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus]) tmpMsg = 'set task.status={0}'.format(newTaskStatus) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) else: # disable scouts if previous attempt didn't use it if not impl.taskSpec.useScout(splitRule): impl.taskSpec.setUseScout(False) # update task with new params self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus]) # appending for incremetnal execution tmpStat = self.taskBufferIF.appendDatasets_JEDI(jediTaskID,impl.inMasterDatasetSpec, impl.inSecDatasetSpecList) if not tmpStat: tmpLog.error('failed to append datasets for incexec') except: errtype,errvalue = sys.exc_info()[:2] tmpErrStr = 'failed to register the task to JEDI with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(tmpErrStr) else: tmpLog.info('done') except: errtype,errvalue = sys.exc_info()[:2] logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.info('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,splitRule,taskStatus,parent_tid in taskList: # make logger tmpLog = MsgWrapper(self.logger,'< jediTaskID={0} >'.format(jediTaskID)) tmpLog.debug('start') tmpStat = Interaction.SC_SUCCEEDED errStr = '' # read task parameters try: taskParam = None taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) except: errtype,errvalue = sys.exc_info()[:2] errStr = 'conversion to map from json failed with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.debug(taskParam) tmpLog.error(errStr) continue tmpStat = Interaction.SC_FAILED # get impl if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('getting Impl') try: # get VO and sourceLabel vo = taskParamMap['vo'] prodSourceLabel = taskParamMap['prodSourceLabel'] taskType = taskParamMap['taskType'] tmpLog.info('vo={0} sourceLabel={1} taskType={2}'.format(vo,prodSourceLabel,taskType)) # get impl impl = self.implFactory.instantiateImpl(vo,prodSourceLabel,taskType, self.taskBufferIF,self.ddmIF) if impl == None: # task refiner is undefined errStr = 'task refiner is undefined for vo={0} sourceLabel={1}'.format(vo,prodSourceLabel) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to get task refiner with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # extract common parameters if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('extracting common') try: # initalize impl impl.initializeRefiner(tmpLog) impl.oldTaskStatus = taskStatus # extract common parameters impl.extractCommon(jediTaskID, taskParamMap, self.workQueueMapper, splitRule) # set parent tid if not parent_tid in [None,jediTaskID]: impl.taskSpec.parent_tid = parent_tid except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to extract common parameters with {0}:{1} {2}'.format(errtype.__name__,errvalue, traceback.format_exc()) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # check attribute length if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('checking attribute length') if not impl.taskSpec.checkAttrLength(): tmpLog.error(impl.taskSpec.errorDialog) tmpStat = Interaction.SC_FAILED # staging if tmpStat == Interaction.SC_SUCCEEDED: if 'toStaging' in taskParamMap and taskStatus <> 'staged': errStr = 'wait until staging is done' impl.taskSpec.status = 'staging' impl.taskSpec.oldStatus = taskStatus impl.taskSpec.setErrDiag(errStr) # not to update some task attributes impl.taskSpec.resetRefinedAttrs() tmpLog.info(errStr) self.taskBufferIF.updateTask_JEDI(impl.taskSpec, {'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus], updateDEFT=False, setFrozenTime=False) continue # check parent noWaitParent = False parentState = None if tmpStat == Interaction.SC_SUCCEEDED: if parent_tid not in [None,jediTaskID]: tmpLog.info('check parent task') try: tmpStat = self.taskBufferIF.checkParentTask_JEDI(parent_tid) parentState = tmpStat if tmpStat == 'completed': # parent is done tmpStat = Interaction.SC_SUCCEEDED elif tmpStat == 'running': if not impl.taskSpec.noWaitParent(): # parent is running errStr = 'pending until parent task {0} is done'.format(parent_tid) impl.taskSpec.status = taskStatus impl.taskSpec.setOnHold() impl.taskSpec.setErrDiag(errStr) # not to update some task attributes impl.taskSpec.resetRefinedAttrs() tmpLog.info(errStr) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus],setFrozenTime=False) continue else: # not wait for parent tmpStat = Interaction.SC_SUCCEEDED noWaitParent = True else: # parent is corrupted tmpStat = Interaction.SC_FAILED tmpErrStr = 'parent task {0} failed to complete'.format(parent_tid) impl.taskSpec.setErrDiag(tmpErrStr) except: errtype,errvalue = sys.exc_info()[:2] errStr = 'failed to check parent task with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # refine if tmpStat == Interaction.SC_SUCCEEDED: tmpLog.info('refining with {0}'.format(impl.__class__.__name__)) try: tmpStat = impl.doRefine(jediTaskID,taskParamMap) except: errtype,errvalue = sys.exc_info()[:2] # wait unknown input if noWaitParent or waitInput if ((impl.taskSpec.noWaitParent() or impl.taskSpec.waitInput()) \ and errtype == JediException.UnknownDatasetError) or parentState == 'running' \ or errtype == Interaction.JEDITemporaryError: if impl.taskSpec.noWaitParent() or parentState == 'running': tmpErrStr = 'pending until parent produces input' setFrozenTime=False elif errtype == Interaction.JEDITemporaryError: tmpErrStr = 'pending due to DDM problem. {0}'.format(errvalue) setFrozenTime=True else: tmpErrStr = 'pending until input is staged' setFrozenTime=True impl.taskSpec.status = taskStatus impl.taskSpec.setOnHold() impl.taskSpec.setErrDiag(tmpErrStr) # not to update some task attributes impl.taskSpec.resetRefinedAttrs() tmpLog.info(tmpErrStr) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus], insertUnknown=impl.unknownDatasetList, setFrozenTime=setFrozenTime) continue else: errStr = 'failed to refine task with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpStat = Interaction.SC_FAILED # register if tmpStat != Interaction.SC_SUCCEEDED: tmpLog.error('failed to refine the task') if impl == None or impl.taskSpec == None: tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID else: tmpTaskSpec = impl.taskSpec tmpTaskSpec.status = 'tobroken' if errStr != '': tmpTaskSpec.setErrDiag(errStr,True) self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':tmpTaskSpec.jediTaskID},oldStatus=[taskStatus]) else: tmpLog.info('registering') # fill JEDI tables try: # enable protection against task duplication if taskParamMap.has_key('uniqueTaskName') and taskParamMap['uniqueTaskName'] and \ not impl.taskSpec.checkPreProcessed(): uniqueTaskName = True else: uniqueTaskName = False strTaskParams = None if impl.updatedTaskParams != None: strTaskParams = RefinerUtils.encodeJSON(impl.updatedTaskParams) if taskStatus in ['registered', 'staged']: # unset pre-process flag if impl.taskSpec.checkPreProcessed(): impl.taskSpec.setPostPreProcess() # full registration tmpStat,newTaskStatus = self.taskBufferIF.registerTaskInOneShot_JEDI(jediTaskID,impl.taskSpec, impl.inMasterDatasetSpec, impl.inSecDatasetSpecList, impl.outDatasetSpecList, impl.outputTemplateMap, impl.jobParamsTemplate, strTaskParams, impl.unmergeMasterDatasetSpec, impl.unmergeDatasetSpecMap, uniqueTaskName, taskStatus) if not tmpStat: tmpErrStr = 'failed to register the task to JEDI in a single shot' tmpLog.error(tmpErrStr) impl.taskSpec.status = newTaskStatus impl.taskSpec.setErrDiag(tmpErrStr,True) self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus]) tmpMsg = 'set task_status={0}'.format(newTaskStatus) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) else: # disable scouts if previous attempt didn't use it if not impl.taskSpec.useScout(splitRule): impl.taskSpec.setUseScout(False) # disallow to reset some attributes for attName in ['ramCount', 'walltime', 'cpuTime', 'startTime']: impl.taskSpec.resetChangedAttr(attName) # update task with new params self.taskBufferIF.updateTask_JEDI(impl.taskSpec,{'jediTaskID':impl.taskSpec.jediTaskID}, oldStatus=[taskStatus]) # appending for incremetnal execution tmpStat = self.taskBufferIF.appendDatasets_JEDI(jediTaskID,impl.inMasterDatasetSpec, impl.inSecDatasetSpecList) if not tmpStat: tmpLog.error('failed to append datasets for incexec') except: errtype,errvalue = sys.exc_info()[:2] tmpErrStr = 'failed to register the task to JEDI with {0}:{1}'.format(errtype.__name__,errvalue) tmpLog.error(tmpErrStr) else: tmpLog.info('done') except: errtype,errvalue = sys.exc_info()[:2] logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))