def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,commandMap in taskList: # make logger tmpLog = MsgWrapper(self.logger,' <jediTaskID={0}>'.format(jediTaskID)) commandStr = commandMap['command'] commentStr = commandMap['comment'] oldStatus = commandMap['oldStatus'] tmpLog.info('start for {0}'.format(commandStr)) tmpStat = Interaction.SC_SUCCEEDED if commandStr in ['kill','finish','reassign']: # get active PandaIDs to be killed pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True) if pandaIDs == None: tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID)) tmpStat = Interaction.SC_FAILED # kill jobs or update task if tmpStat == Interaction.SC_SUCCEEDED: if pandaIDs == []: # done since no active jobs tmpLog.info('completed the command') tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID updateTaskStatus = True if commandStr != 'reassign': # keep oldStatus for task reassignment since it is reset when actually reassigned tmpTaskSpec.forceUpdate('oldStatus') else: # extract cloud or site tmpItems = commentStr.split(':') if tmpItems[0] == 'cloud': tmpTaskSpec.cloud = tmpItems[1] else: tmpTaskSpec.site = tmpItems[1] # back to oldStatus if necessary if tmpItems[2] == 'y': tmpTaskSpec.status = oldStatus tmpTaskSpec.forceUpdate('oldStatus') updateTaskStatus = False if updateTaskStatus: tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done'] tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID}) else: tmpLog.info('sending kill command') tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True) tmpLog.info('done with {0}'.format(str(tmpRet))) elif commandStr in ['retry','incexec']: # change task params for incexec if commandStr == 'incexec': try: # read task params taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) # remove some params for newKey in ['nFiles','fixedSandbox']: try: del taskParamMap[newKey] except: pass # convert new params newParamMap = RefinerUtils.decodeJSON(commentStr) # change params for newKey,newVal in newParamMap.iteritems(): if newVal == None: # delete if newKey in taskParamMap: del taskParamMap[newKey] else: # change taskParamMap[newKey] = newVal # overwrite sandbox if 'fixedSandbox' in taskParamMap: # noBuild for tmpParam in taskParamMap['jobParameters']: if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None: tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox'] # build if taskParamMap.has_key('buildSpec'): taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox'] # merge if taskParamMap.has_key('mergeSpec'): taskParamMap['mergeSpec']['jobParameters'] = \ re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters']) # encode new param strTaskParams = RefinerUtils.encodeJSON(taskParamMap) tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams) if tmpRet != True: tmpLog.error('failed to update task params') continue except: errtype,errvalue = sys.exc_info()[:2] tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue)) continue # retry failed files tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr) if tmpRet == True: tmpMsg = 'set task.status={0}'.format(newTaskStatus) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpLog.info('done with {0}'.format(tmpRet)) else: tmpLog.error('unknown command') except: errtype,errvalue = sys.exc_info()[:2] logger.error('{0} failed in runImpl() with {1}:{2}'.format(self.__class__.__name__,errtype.__name__,errvalue))
def extractCommon(self, jediTaskID, taskParamMap, workQueueMapper, splitRule): # make task spec taskSpec = JediTaskSpec() taskSpec.jediTaskID = jediTaskID taskSpec.taskName = taskParamMap['taskName'] taskSpec.userName = taskParamMap['userName'] taskSpec.vo = taskParamMap['vo'] taskSpec.prodSourceLabel = taskParamMap['prodSourceLabel'] taskSpec.taskPriority = taskParamMap['taskPriority'] taskSpec.currentPriority = taskSpec.taskPriority taskSpec.architecture = taskParamMap['architecture'] taskSpec.transUses = taskParamMap['transUses'] taskSpec.transHome = taskParamMap['transHome'] taskSpec.transPath = taskParamMap['transPath'] taskSpec.processingType = taskParamMap['processingType'] taskSpec.taskType = taskParamMap['taskType'] taskSpec.splitRule = splitRule taskSpec.startTime = datetime.datetime.utcnow() if taskParamMap.has_key('workingGroup'): taskSpec.workingGroup = taskParamMap['workingGroup'] if taskParamMap.has_key('countryGroup'): taskSpec.countryGroup = taskParamMap['countryGroup'] if taskParamMap.has_key('ticketID'): taskSpec.ticketID = taskParamMap['ticketID'] if taskParamMap.has_key('ticketSystemType'): taskSpec.ticketSystemType = taskParamMap['ticketSystemType'] if taskParamMap.has_key('reqID'): taskSpec.reqID = taskParamMap['reqID'] else: taskSpec.reqID = jediTaskID if taskParamMap.has_key('coreCount'): taskSpec.coreCount = taskParamMap['coreCount'] else: taskSpec.coreCount = 1 if taskParamMap.has_key('walltime'): taskSpec.walltime = taskParamMap['walltime'] else: taskSpec.walltime = 0 if not taskParamMap.has_key('walltimeUnit'): # force to set NULL so that retried tasks get data from scouts again taskSpec.forceUpdate('walltimeUnit') if taskParamMap.has_key('outDiskCount'): taskSpec.outDiskCount = taskParamMap['outDiskCount'] else: taskSpec.outDiskCount = 0 if 'outDiskUnit' in taskParamMap: taskSpec.outDiskUnit = taskParamMap['outDiskUnit'] if taskParamMap.has_key('workDiskCount'): taskSpec.workDiskCount = taskParamMap['workDiskCount'] else: taskSpec.workDiskCount = 0 if taskParamMap.has_key('workDiskUnit'): taskSpec.workDiskUnit = taskParamMap['workDiskUnit'] if taskParamMap.has_key('ramCount'): taskSpec.ramCount = taskParamMap['ramCount'] else: taskSpec.ramCount = 0 if taskParamMap.has_key('ramUnit'): taskSpec.ramUnit = taskParamMap['ramUnit'] if taskParamMap.has_key('baseRamCount'): taskSpec.baseRamCount = taskParamMap['baseRamCount'] else: taskSpec.baseRamCount = 0 # HS06 stuff if 'cpuTimeUnit' in taskParamMap: taskSpec.cpuTimeUnit = taskParamMap['cpuTimeUnit'] if 'cpuTime' in taskParamMap: taskSpec.cpuTime = taskParamMap['cpuTime'] if 'cpuEfficiency' in taskParamMap: taskSpec.cpuEfficiency = taskParamMap['cpuEfficiency'] else: # 90% of cpu efficiency by default taskSpec.cpuEfficiency = 90 if 'baseWalltime' in taskParamMap: taskSpec.baseWalltime = taskParamMap['baseWalltime'] else: # 10min of offset by default taskSpec.baseWalltime = 10 * 60 # for merge if 'mergeRamCount' in taskParamMap: taskSpec.mergeRamCount = taskParamMap['mergeRamCount'] if 'mergeCoreCount' in taskParamMap: taskSpec.mergeCoreCount = taskParamMap['mergeCoreCount'] # scout if not taskParamMap.has_key( 'skipScout') and not taskSpec.isPostScout(): taskSpec.setUseScout(True) # cloud if taskParamMap.has_key('cloud'): self.cloudName = taskParamMap['cloud'] taskSpec.cloud = self.cloudName else: # set dummy to force update taskSpec.cloud = 'dummy' taskSpec.cloud = None # site if taskParamMap.has_key('site'): self.siteName = taskParamMap['site'] taskSpec.site = self.siteName else: # set dummy to force update taskSpec.site = 'dummy' taskSpec.site = None # nucleus if 'nucleus' in taskParamMap: taskSpec.nucleus = taskParamMap['nucleus'] # preset some parameters for job cloning if 'useJobCloning' in taskParamMap: # set implicit parameters if not 'nEventsPerWorker' in taskParamMap: taskParamMap['nEventsPerWorker'] = 1 if not 'nSitesPerJob' in taskParamMap: taskParamMap['nSitesPerJob'] = 2 if not 'nEsConsumers' in taskParamMap: taskParamMap['nEsConsumers'] = taskParamMap['nSitesPerJob'] # event service flag if 'useJobCloning' in taskParamMap: taskSpec.eventService = 2 elif taskParamMap.has_key('nEventsPerWorker'): taskSpec.eventService = 1 else: taskSpec.eventService = 0 # ttcr: requested time to completion if taskParamMap.has_key('ttcrTimestamp'): try: # get rid of the +00:00 timezone string and parse the timestamp taskSpec.ttcRequested = datetime.datetime.strptime( taskParamMap['ttcrTimestamp'].split('+')[0], '%Y-%m-%d %H:%M:%S.%f') except (IndexError, ValueError): pass # goal if 'goal' in taskParamMap: try: taskSpec.goal = int(float(taskParamMap['goal']) * 10) if taskSpec.goal >= 1000: taskSpec.goal = None except: pass # campaign if taskParamMap.has_key('campaign'): taskSpec.campaign = taskParamMap['campaign'] # request type if 'requestType' in taskParamMap: taskSpec.requestType = taskParamMap['requestType'] self.taskSpec = taskSpec # set split rule if 'tgtNumEventsPerJob' in taskParamMap: # set nEventsPerJob not respect file boundaries when nFilesPerJob is not used if not 'nFilesPerJob' in taskParamMap: self.setSplitRule(None, taskParamMap['tgtNumEventsPerJob'], JediTaskSpec.splitRuleToken['nEventsPerJob']) self.setSplitRule(taskParamMap, 'nFilesPerJob', JediTaskSpec.splitRuleToken['nFilesPerJob']) self.setSplitRule(taskParamMap, 'nEventsPerJob', JediTaskSpec.splitRuleToken['nEventsPerJob']) self.setSplitRule(taskParamMap, 'nGBPerJob', JediTaskSpec.splitRuleToken['nGBPerJob']) self.setSplitRule(taskParamMap, 'nMaxFilesPerJob', JediTaskSpec.splitRuleToken['nMaxFilesPerJob']) self.setSplitRule(taskParamMap, 'nEventsPerWorker', JediTaskSpec.splitRuleToken['nEventsPerWorker']) self.setSplitRule(taskParamMap, 'useLocalIO', JediTaskSpec.splitRuleToken['useLocalIO']) self.setSplitRule(taskParamMap, 'disableAutoRetry', JediTaskSpec.splitRuleToken['disableAutoRetry']) self.setSplitRule(taskParamMap, 'nEsConsumers', JediTaskSpec.splitRuleToken['nEsConsumers']) self.setSplitRule(taskParamMap, 'waitInput', JediTaskSpec.splitRuleToken['waitInput']) self.setSplitRule(taskParamMap, 'addNthFieldToLFN', JediTaskSpec.splitRuleToken['addNthFieldToLFN']) self.setSplitRule(taskParamMap, 'scoutSuccessRate', JediTaskSpec.splitRuleToken['scoutSuccessRate']) self.setSplitRule(taskParamMap, 't1Weight', JediTaskSpec.splitRuleToken['t1Weight']) self.setSplitRule(taskParamMap, 'maxAttemptES', JediTaskSpec.splitRuleToken['maxAttemptES']) self.setSplitRule(taskParamMap, 'nSitesPerJob', JediTaskSpec.splitRuleToken['nSitesPerJob']) self.setSplitRule(taskParamMap, 'nJumboJobs', JediTaskSpec.splitRuleToken['nJumboJobs']) self.setSplitRule(taskParamMap, 'nEventsPerMergeJob', JediTaskSpec.splitRuleToken['nEventsPerMergeJob']) self.setSplitRule(taskParamMap, 'nFilesPerMergeJob', JediTaskSpec.splitRuleToken['nFilesPerMergeJob']) self.setSplitRule(taskParamMap, 'nGBPerMergeJob', JediTaskSpec.splitRuleToken['nGBPerMergeJob']) self.setSplitRule(taskParamMap, 'nMaxFilesPerMergeJob', JediTaskSpec.splitRuleToken['nMaxFilesPerMergeJob']) if taskParamMap.has_key('loadXML'): self.setSplitRule(None, 3, JediTaskSpec.splitRuleToken['loadXML']) self.setSplitRule(None, 4, JediTaskSpec.splitRuleToken['groupBoundaryID']) if taskParamMap.has_key('pfnList'): self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['pfnList']) if taskParamMap.has_key( 'noWaitParent') and taskParamMap['noWaitParent'] == True: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['noWaitParent']) if 'respectLB' in taskParamMap: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['respectLB']) if taskParamMap.has_key('reuseSecOnDemand'): self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['reuseSecOnDemand']) if 'ddmBackEnd' in taskParamMap: self.taskSpec.setDdmBackEnd(taskParamMap['ddmBackEnd']) if 'disableReassign' in taskParamMap: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['disableReassign']) if 'allowPartialFinish' in taskParamMap: self.setSplitRule( None, 1, JediTaskSpec.splitRuleToken['allowPartialFinish']) if 'useExhausted' in taskParamMap: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['useExhausted']) if 'useRealNumEvents' in taskParamMap: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['useRealNumEvents']) if 'ipConnectivity' in taskParamMap: self.taskSpec.setIpConnectivity(taskParamMap['ipConnectivity']) if 'altStageOut' in taskParamMap: self.taskSpec.setAltStageOut(taskParamMap['altStageOut']) if 'allowInputLAN' in taskParamMap: self.taskSpec.setAllowInputLAN(taskParamMap['allowInputLAN']) if 'runUntilClosed' in taskParamMap: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['runUntilClosed']) if 'stayOutputOnSite' in taskParamMap: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['stayOutputOnSite']) if 'useJobCloning' in taskParamMap: scValue = EventServiceUtils.getJobCloningValue( taskParamMap['useJobCloning']) self.setSplitRule(None, scValue, JediTaskSpec.splitRuleToken['useJobCloning']) if 'failWhenGoalUnreached' in taskParamMap and taskParamMap[ 'failWhenGoalUnreached'] == True: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['failGoalUnreached']) if 'switchEStoNormal' in taskParamMap: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['switchEStoNormal']) if 'nEventsPerRange' in taskParamMap: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['dynamicNumEvents']) if 'allowInputWAN' in taskParamMap and taskParamMap[ 'allowInputWAN'] == True: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['allowInputWAN']) if 'putLogToOS' in taskParamMap and taskParamMap['putLogToOS'] == True: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['putLogToOS']) if 'mergeEsOnOS' in taskParamMap and taskParamMap[ 'mergeEsOnOS'] == True: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['mergeEsOnOS']) if 'writeInputToFile' in taskParamMap and taskParamMap[ 'writeInputToFile'] == True: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['writeInputToFile']) if 'useFileAsSourceLFN' in taskParamMap and taskParamMap[ 'useFileAsSourceLFN'] == True: self.setSplitRule( None, 1, JediTaskSpec.splitRuleToken['useFileAsSourceLFN']) if 'ignoreMissingInDS' in taskParamMap and taskParamMap[ 'ignoreMissingInDS'] == True: self.setSplitRule(None, 1, JediTaskSpec.splitRuleToken['ignoreMissingInDS']) # work queue workQueue = None if 'workQueueName' in taskParamMap: # work queue is specified workQueue = workQueueMapper.getQueueWithName( taskSpec.vo, taskSpec.prodSourceLabel, taskParamMap['workQueueName']) if workQueue is None: # get work queue based on task attributes workQueue, tmpStr = workQueueMapper.getQueueWithSelParams( taskSpec.vo, taskSpec.prodSourceLabel, processingType=taskSpec.processingType, workingGroup=taskSpec.workingGroup, coreCount=taskSpec.coreCount, site=taskSpec.site, eventService=taskSpec.eventService, splitRule=taskSpec.splitRule, campaign=taskSpec.campaign) if workQueue is None: errStr = 'workqueue is undefined for vo={0} label={1} '.format( taskSpec.vo, taskSpec.prodSourceLabel) errStr += 'processingType={0} workingGroup={1} coreCount={2} eventService={3} '.format( taskSpec.processingType, taskSpec.workingGroup, taskSpec.coreCount, taskSpec.eventService) errStr += 'splitRule={0} campaign={1}'.format( taskSpec.splitRule, taskSpec.campaign) raise RuntimeError, errStr self.taskSpec.workQueue_ID = workQueue.queue_id # Initialize the global share gshare = None if 'gshare' in taskParamMap and self.taskBufferIF.is_valid_share( taskParamMap['gshare']): # work queue is specified gshare = taskParamMap['gshare'] else: # get share based on definition gshare = self.taskBufferIF.get_share_for_task(self.taskSpec) if gshare is None: gshare = 'No match' # errStr = 'share is undefined for vo={0} label={1} '.format(taskSpec.vo,taskSpec.prodSourceLabel) # errStr += 'workingGroup={0} campaign={1} '.format(taskSpec.workingGroup, taskSpec.campaign) # raise RuntimeError,errStr self.taskSpec.gshare = gshare # return return
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,commandMap in taskList: # make logger tmpLog = MsgWrapper(self.logger,' <jediTaskID={0}>'.format(jediTaskID)) commandStr = commandMap['command'] commentStr = commandMap['comment'] oldStatus = commandMap['oldStatus'] tmpLog.info('start for {0}'.format(commandStr)) tmpStat = Interaction.SC_SUCCEEDED if commandStr in ['kill','finish','reassign']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.sendMsg(tmpMsg,self.msgType) # loop twice to see immediate result for iLoop in range(2): # get active PandaIDs to be killed if commandStr == 'reassign' and commentStr != None and 'soft reassign' in commentStr: pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID) else: pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True) if pandaIDs == None: tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID)) tmpStat = Interaction.SC_FAILED # kill jobs or update task if tmpStat == Interaction.SC_SUCCEEDED: if pandaIDs == []: # done since no active jobs tmpMsg = 'completed cleaning jobs' tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID updateTaskStatus = True if commandStr != 'reassign': # reset oldStatus # keep oldStatus for task reassignment since it is reset when actually reassigned tmpTaskSpec.forceUpdate('oldStatus') else: # extract cloud or site if commentStr != None: tmpItems = commentStr.split(':') if tmpItems[0] == 'cloud': tmpTaskSpec.cloud = tmpItems[1] else: tmpTaskSpec.site = tmpItems[1] tmpMsg = 'set {0}={1}'.format(tmpItems[0],tmpItems[1]) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) # back to oldStatus if necessary if tmpItems[2] == 'y': tmpTaskSpec.status = oldStatus tmpTaskSpec.forceUpdate('oldStatus') updateTaskStatus = False if commandStr == 'reassign': tmpTaskSpec.forceUpdate('errorDialog') if updateTaskStatus: tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done'] tmpMsg = 'set task.status={0}'.format(tmpTaskSpec.status) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID}) tmpLog.info('done with {0}'.format(str(tmpRet))) break else: # kill only in the first loop if iLoop > 0: break # wait or kill jobs if 'soft finish' in commentStr: tmpMsg = "wating {0} jobs for soft finish".format(len(pandaIDs)) tmpLog.info(tmpMsg) tmpRet = True tmpLog.info('done with {0}'.format(str(tmpRet))) break else: tmpMsg = "trying to kill {0} jobs".format(len(pandaIDs)) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) if commandStr in ['reassign','finish']: # force kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'52',True) else: # normal kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True) tmpLog.info('done with {0}'.format(str(tmpRet))) elif commandStr in ['retry','incexec']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.sendMsg(tmpMsg,self.msgType) # change task params for incexec if commandStr == 'incexec': try: # read task params taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) # remove some params for newKey in ['nFiles','fixedSandbox']: try: del taskParamMap[newKey] except: pass # convert new params newParamMap = RefinerUtils.decodeJSON(commentStr) # change params for newKey,newVal in newParamMap.iteritems(): if newVal == None: # delete if newKey in taskParamMap: del taskParamMap[newKey] else: # change taskParamMap[newKey] = newVal # overwrite sandbox if 'fixedSandbox' in taskParamMap: # noBuild for tmpParam in taskParamMap['jobParameters']: if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None: tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox'] # build if taskParamMap.has_key('buildSpec'): taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox'] # merge if taskParamMap.has_key('mergeSpec'): taskParamMap['mergeSpec']['jobParameters'] = \ re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters']) # encode new param strTaskParams = RefinerUtils.encodeJSON(taskParamMap) tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams) if tmpRet != True: tmpLog.error('failed to update task params') continue except: errtype,errvalue = sys.exc_info()[:2] tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue)) continue # retry failed files tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr) if tmpRet == True: tmpMsg = 'set task.status={0}'.format(newTaskStatus) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpLog.info('done with {0}'.format(tmpRet)) else: tmpLog.error('unknown command') except: errtype,errvalue = sys.exc_info()[:2] errStr = '{0} failed in runImpl() with {1}:{2} '.format(self.__class__.__name__,errtype.__name__,errvalue) errStr += traceback.format_exc() logger.error(errStr)
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug('{0} terminating since no more items'.format(self.__class__.__name__)) return # loop over all tasks for jediTaskID,commandMap in taskList: # make logger tmpLog = MsgWrapper(self.logger,' < jediTaskID={0} >'.format(jediTaskID)) commandStr = commandMap['command'] commentStr = commandMap['comment'] oldStatus = commandMap['oldStatus'] tmpLog.info('start for {0}'.format(commandStr)) tmpStat = Interaction.SC_SUCCEEDED if commandStr in ['kill','finish','reassign']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) # loop twice to see immediate result for iLoop in range(2): # get active PandaIDs to be killed if commandStr == 'reassign' and commentStr != None and 'soft reassign' in commentStr: pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID) elif commandStr == 'reassign' and commentStr != None and 'nokill reassign' in commentStr: pandaIDs = [] else: pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI(jediTaskID,True) if pandaIDs == None: tmpLog.error('failed to get PandaIDs for jediTaskID={0}'.format(jediTaskID)) tmpStat = Interaction.SC_FAILED # kill jobs or update task if tmpStat == Interaction.SC_SUCCEEDED: if pandaIDs == []: # done since no active jobs tmpMsg = 'completed cleaning jobs' tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID updateTaskStatus = True if commandStr != 'reassign': # reset oldStatus # keep oldStatus for task reassignment since it is reset when actually reassigned tmpTaskSpec.forceUpdate('oldStatus') else: # extract cloud or site if commentStr != None: tmpItems = commentStr.split(':') if tmpItems[0] == 'cloud': tmpTaskSpec.cloud = tmpItems[1] elif tmpItems[0] == 'nucleus': tmpTaskSpec.nucleus = tmpItems[1] else: tmpTaskSpec.site = tmpItems[1] tmpMsg = 'set {0}={1}'.format(tmpItems[0],tmpItems[1]) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) # back to oldStatus if necessary if tmpItems[2] == 'y': tmpTaskSpec.status = oldStatus tmpTaskSpec.forceUpdate('oldStatus') updateTaskStatus = False if commandStr == 'reassign': tmpTaskSpec.forceUpdate('errorDialog') if commandStr == 'finish': # update datasets tmpLog.info('updating datasets to finish') tmpStat = self.taskBufferIF.updateDatasetsToFinishTask_JEDI(jediTaskID, self.pid) if not tmpStat: tmpLog.info('wait until datasets are updated to finish') # ignore failGoalUnreached when manually finished tmpStat,taskSpec = self.taskBufferIF.getTaskWithID_JEDI(jediTaskID) tmpTaskSpec.splitRule = taskSpec.splitRule tmpTaskSpec.unsetFailGoalUnreached() if updateTaskStatus: tmpTaskSpec.status = JediTaskSpec.commandStatusMap()[commandStr]['done'] tmpMsg = 'set task_status={0}'.format(tmpTaskSpec.status) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.updateTask_JEDI(tmpTaskSpec,{'jediTaskID':jediTaskID}, setOldModTime=True) tmpLog.info('done with {0}'.format(str(tmpRet))) break else: # kill only in the first loop if iLoop > 0: break # wait or kill jobs if 'soft finish' in commentStr: queuedPandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI(jediTaskID) tmpMsg = "trying to kill {0} queued jobs for soft finish".format(len(queuedPandaIDs)) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.killJobs(queuedPandaIDs,commentStr,'52',True) tmpMsg = "wating {0} jobs for soft finish".format(len(pandaIDs)) tmpLog.info(tmpMsg) tmpRet = True tmpLog.info('done with {0}'.format(str(tmpRet))) break else: tmpMsg = "trying to kill {0} jobs".format(len(pandaIDs)) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) if commandStr in ['finish']: # force kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'52',True) elif commandStr in ['reassign']: # force kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'51',True) else: # normal kill tmpRet = self.taskBufferIF.killJobs(pandaIDs,commentStr,'50',True) tmpLog.info('done with {0}'.format(str(tmpRet))) elif commandStr in ['retry','incexec']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg,self.msgType) # change task params for incexec if commandStr == 'incexec': try: # read task params taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI(jediTaskID) taskParamMap = RefinerUtils.decodeJSON(taskParam) # remove some params for newKey in ['nFiles','fixedSandbox']: try: del taskParamMap[newKey] except: pass # convert new params newParamMap = RefinerUtils.decodeJSON(commentStr) # change params for newKey,newVal in newParamMap.iteritems(): if newVal == None: # delete if newKey in taskParamMap: del taskParamMap[newKey] else: # change taskParamMap[newKey] = newVal # overwrite sandbox if 'fixedSandbox' in taskParamMap: # noBuild for tmpParam in taskParamMap['jobParameters']: if tmpParam['type'] == 'constant' and re.search('^-a [^ ]+$',tmpParam['value']) != None: tmpParam['value'] = '-a {0}'.taskParamMap['fixedSandbox'] # build if taskParamMap.has_key('buildSpec'): taskParamMap['buildSpec']['archiveName'] = taskParamMap['fixedSandbox'] # merge if taskParamMap.has_key('mergeSpec'): taskParamMap['mergeSpec']['jobParameters'] = \ re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters']) # encode new param strTaskParams = RefinerUtils.encodeJSON(taskParamMap) tmpRet = self.taskBufferIF.updateTaskParams_JEDI(jediTaskID,strTaskParams) if tmpRet != True: tmpLog.error('failed to update task params') continue except: errtype,errvalue = sys.exc_info()[:2] tmpLog.error('failed to change task params with {0}:{1}'.format(errtype.__name__,errvalue)) continue # retry child tasks if 'sole ' in commentStr: retryChildTasks = False else: retryChildTasks = True # discard events if 'discard ' in commentStr: discardEvents = True else: discardEvents = False tmpRet,newTaskStatus = self.taskBufferIF.retryTask_JEDI(jediTaskID,commandStr, retryChildTasks=retryChildTasks, discardEvents=discardEvents) if tmpRet == True: tmpMsg = 'set task_status={0}'.format(newTaskStatus) tmpLog.sendMsg(tmpMsg,self.msgType) tmpLog.info(tmpMsg) tmpLog.info('done with {0}'.format(tmpRet)) else: tmpLog.error('unknown command') except: errtype,errvalue = sys.exc_info()[:2] errStr = '{0} failed in runImpl() with {1}:{2} '.format(self.__class__.__name__,errtype.__name__,errvalue) errStr += traceback.format_exc() logger.error(errStr)
def runImpl(self): while True: try: # get a part of list nTasks = 10 taskList = self.taskList.get(nTasks) # no more datasets if len(taskList) == 0: self.logger.debug( '{0} terminating since no more items'.format( self.__class__.__name__)) return # loop over all tasks for jediTaskID, commandMap in taskList: # make logger tmpLog = MsgWrapper( self.logger, ' < jediTaskID={0} >'.format(jediTaskID)) commandStr = commandMap['command'] commentStr = commandMap['comment'] oldStatus = commandMap['oldStatus'] tmpLog.info('start for {0}'.format(commandStr)) tmpStat = Interaction.SC_SUCCEEDED if commandStr in ['kill', 'finish', 'reassign']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg, self.msgType) # loop twice to see immediate result for iLoop in range(2): # get active PandaIDs to be killed if commandStr == 'reassign' and commentStr is not None and 'soft reassign' in commentStr: pandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI( jediTaskID) elif commandStr == 'reassign' and commentStr is not None and 'nokill reassign' in commentStr: pandaIDs = [] else: pandaIDs = self.taskBufferIF.getPandaIDsWithTask_JEDI( jediTaskID, True) if pandaIDs is None: tmpLog.error( 'failed to get PandaIDs for jediTaskID={0}' .format(jediTaskID)) tmpStat = Interaction.SC_FAILED # kill jobs or update task if tmpStat == Interaction.SC_SUCCEEDED: if pandaIDs == []: # done since no active jobs tmpMsg = 'completed cleaning jobs' tmpLog.sendMsg(tmpMsg, self.msgType) tmpLog.info(tmpMsg) tmpTaskSpec = JediTaskSpec() tmpTaskSpec.jediTaskID = jediTaskID updateTaskStatus = True if commandStr != 'reassign': # reset oldStatus # keep oldStatus for task reassignment since it is reset when actually reassigned tmpTaskSpec.forceUpdate('oldStatus') else: # extract cloud or site if commentStr is not None: tmpItems = commentStr.split(':') if tmpItems[0] == 'cloud': tmpTaskSpec.cloud = tmpItems[1] elif tmpItems[0] == 'nucleus': tmpTaskSpec.nucleus = tmpItems[ 1] else: tmpTaskSpec.site = tmpItems[1] tmpMsg = 'set {0}={1}'.format( tmpItems[0], tmpItems[1]) tmpLog.sendMsg( tmpMsg, self.msgType) tmpLog.info(tmpMsg) # back to oldStatus if necessary if tmpItems[2] == 'y': tmpTaskSpec.status = oldStatus tmpTaskSpec.forceUpdate( 'oldStatus') updateTaskStatus = False if commandStr == 'reassign': tmpTaskSpec.forceUpdate('errorDialog') if commandStr == 'finish': # update datasets tmpLog.info( 'updating datasets to finish') tmpStat = self.taskBufferIF.updateDatasetsToFinishTask_JEDI( jediTaskID, self.pid) if not tmpStat: tmpLog.info( 'wait until datasets are updated to finish' ) # ignore failGoalUnreached when manually finished tmpStat, taskSpec = self.taskBufferIF.getTaskWithID_JEDI( jediTaskID) tmpTaskSpec.splitRule = taskSpec.splitRule tmpTaskSpec.unsetFailGoalUnreached() if updateTaskStatus: tmpTaskSpec.status = JediTaskSpec.commandStatusMap( )[commandStr]['done'] tmpMsg = 'set task_status={0}'.format( tmpTaskSpec.status) tmpLog.sendMsg(tmpMsg, self.msgType) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.updateTask_JEDI( tmpTaskSpec, {'jediTaskID': jediTaskID}, setOldModTime=True) tmpLog.info('done with {0}'.format( str(tmpRet))) break else: # kill only in the first loop if iLoop > 0: break # wait or kill jobs if commentStr and 'soft finish' in commentStr: queuedPandaIDs = self.taskBufferIF.getQueuedPandaIDsWithTask_JEDI( jediTaskID) tmpMsg = "trying to kill {0} queued jobs for soft finish".format( len(queuedPandaIDs)) tmpLog.info(tmpMsg) tmpRet = self.taskBufferIF.killJobs( queuedPandaIDs, commentStr, '52', True) tmpMsg = "wating {0} jobs for soft finish".format( len(pandaIDs)) tmpLog.info(tmpMsg) tmpRet = True tmpLog.info('done with {0}'.format( str(tmpRet))) break else: tmpMsg = "trying to kill {0} jobs".format( len(pandaIDs)) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg, self.msgType) if commandStr in ['finish']: # force kill tmpRet = self.taskBufferIF.killJobs( pandaIDs, commentStr, '52', True) elif commandStr in ['reassign']: # force kill tmpRet = self.taskBufferIF.killJobs( pandaIDs, commentStr, '51', True) else: # normal kill tmpRet = self.taskBufferIF.killJobs( pandaIDs, commentStr, '50', True) tmpLog.info('done with {0}'.format( str(tmpRet))) elif commandStr in ['retry', 'incexec']: tmpMsg = 'executing {0}'.format(commandStr) tmpLog.info(tmpMsg) tmpLog.sendMsg(tmpMsg, self.msgType) # change task params for incexec if commandStr == 'incexec': try: # read task params taskParam = self.taskBufferIF.getTaskParamsWithID_JEDI( jediTaskID) taskParamMap = RefinerUtils.decodeJSON( taskParam) # remove some params for newKey in ['nFiles', 'fixedSandbox']: try: del taskParamMap[newKey] except Exception: pass # convert new params newParamMap = RefinerUtils.decodeJSON( commentStr) # change params for newKey, newVal in iteritems(newParamMap): if newVal is None: # delete if newKey in taskParamMap: del taskParamMap[newKey] else: # change taskParamMap[newKey] = newVal # overwrite sandbox if 'fixedSandbox' in taskParamMap: # noBuild for tmpParam in taskParamMap[ 'jobParameters']: if tmpParam[ 'type'] == 'constant' and re.search( '^-a [^ ]+$', tmpParam['value'] ) is not None: tmpParam['value'] = '-a {0}'.format( taskParamMap['fixedSandbox']) # build if 'buildSpec' in taskParamMap: taskParamMap['buildSpec'][ 'archiveName'] = taskParamMap[ 'fixedSandbox'] # merge if 'mergeSpec' in taskParamMap: taskParamMap['mergeSpec']['jobParameters'] = \ re.sub('-a [^ ]+','-a {0}'.format(taskParamMap['fixedSandbox']),taskParamMap['mergeSpec']['jobParameters']) # encode new param strTaskParams = RefinerUtils.encodeJSON( taskParamMap) tmpRet = self.taskBufferIF.updateTaskParams_JEDI( jediTaskID, strTaskParams) if tmpRet is not True: tmpLog.error( 'failed to update task params') continue except Exception as e: tmpLog.error( 'failed to change task params with {} {}'. format(str(e), traceback.format_exc())) continue # retry child tasks if 'sole ' in commentStr: retryChildTasks = False else: retryChildTasks = True # discard events if 'discard ' in commentStr: discardEvents = True else: discardEvents = False # release un-staged files if 'staged ' in commentStr: releaseUnstaged = True else: releaseUnstaged = False tmpRet, newTaskStatus = self.taskBufferIF.retryTask_JEDI( jediTaskID, commandStr, retryChildTasks=retryChildTasks, discardEvents=discardEvents, release_unstaged=releaseUnstaged) if tmpRet is True: tmpMsg = 'set task_status={0}'.format( newTaskStatus) tmpLog.sendMsg(tmpMsg, self.msgType) tmpLog.info(tmpMsg) tmpLog.info('done with {0}'.format(tmpRet)) else: tmpLog.error('unknown command') except Exception as e: errStr = '{} failed in runImpl() with {} {} '.format( self.__class__.__name__, str(e), traceback.format_exc()) logger.error(errStr)
def extractCommon(self,jediTaskID,taskParamMap,workQueueMapper,splitRule): # make task spec taskSpec = JediTaskSpec() taskSpec.jediTaskID = jediTaskID taskSpec.taskName = taskParamMap['taskName'] taskSpec.userName = taskParamMap['userName'] taskSpec.vo = taskParamMap['vo'] taskSpec.prodSourceLabel = taskParamMap['prodSourceLabel'] taskSpec.taskPriority = taskParamMap['taskPriority'] taskSpec.currentPriority = taskSpec.taskPriority taskSpec.architecture = taskParamMap['architecture'] taskSpec.transUses = taskParamMap['transUses'] taskSpec.transHome = taskParamMap['transHome'] taskSpec.transPath = taskParamMap['transPath'] taskSpec.processingType = taskParamMap['processingType'] taskSpec.taskType = taskParamMap['taskType'] taskSpec.splitRule = splitRule taskSpec.startTime = datetime.datetime.utcnow() if taskParamMap.has_key('workingGroup'): taskSpec.workingGroup = taskParamMap['workingGroup'] if taskParamMap.has_key('countryGroup'): taskSpec.countryGroup = taskParamMap['countryGroup'] if taskParamMap.has_key('ticketID'): taskSpec.ticketID = taskParamMap['ticketID'] if taskParamMap.has_key('ticketSystemType'): taskSpec.ticketSystemType = taskParamMap['ticketSystemType'] if taskParamMap.has_key('reqID'): taskSpec.reqID = taskParamMap['reqID'] else: taskSpec.reqID = jediTaskID if taskParamMap.has_key('coreCount'): taskSpec.coreCount = taskParamMap['coreCount'] else: taskSpec.coreCount = 1 if taskParamMap.has_key('walltime'): taskSpec.walltime = taskParamMap['walltime'] else: taskSpec.walltime = 0 if not taskParamMap.has_key('walltimeUnit'): # force to set NULL so that retried tasks get data from scouts again taskSpec.forceUpdate('walltimeUnit') if taskParamMap.has_key('outDiskCount'): taskSpec.outDiskCount = taskParamMap['outDiskCount'] else: taskSpec.outDiskCount = 0 if 'outDiskUnit' in taskParamMap: taskSpec.outDiskUnit = taskParamMap['outDiskUnit'] if taskParamMap.has_key('workDiskCount'): taskSpec.workDiskCount = taskParamMap['workDiskCount'] else: taskSpec.workDiskCount = 0 if taskParamMap.has_key('workDiskUnit'): taskSpec.workDiskUnit = taskParamMap['workDiskUnit'] if taskParamMap.has_key('ramCount'): taskSpec.ramCount = taskParamMap['ramCount'] else: taskSpec.ramCount = 0 if taskParamMap.has_key('ramUnit'): taskSpec.ramUnit = taskParamMap['ramUnit'] if taskParamMap.has_key('baseRamCount'): taskSpec.baseRamCount = taskParamMap['baseRamCount'] else: taskSpec.baseRamCount = 0 # HS06 stuff if 'cpuTimeUnit' in taskParamMap: taskSpec.cpuTimeUnit = taskParamMap['cpuTimeUnit'] if 'cpuTime' in taskParamMap: taskSpec.cpuTime = taskParamMap['cpuTime'] if 'cpuEfficiency' in taskParamMap: taskSpec.cpuEfficiency = taskParamMap['cpuEfficiency'] else: # 90% of cpu efficiency by default taskSpec.cpuEfficiency = 90 if 'baseWalltime' in taskParamMap: taskSpec.baseWalltime = taskParamMap['baseWalltime'] else: # 10min of offset by default taskSpec.baseWalltime = 10*60 # for merge if 'mergeRamCount' in taskParamMap: taskSpec.mergeRamCount = taskParamMap['mergeRamCount'] if 'mergeCoreCount' in taskParamMap: taskSpec.mergeCoreCount = taskParamMap['mergeCoreCount'] # scout if not taskParamMap.has_key('skipScout') and not taskSpec.isPostScout(): taskSpec.setUseScout(True) # cloud if taskParamMap.has_key('cloud'): self.cloudName = taskParamMap['cloud'] taskSpec.cloud = self.cloudName else: # set dummy to force update taskSpec.cloud = 'dummy' taskSpec.cloud = None # site if taskParamMap.has_key('site'): self.siteName = taskParamMap['site'] taskSpec.site = self.siteName else: # set dummy to force update taskSpec.site = 'dummy' taskSpec.site = None # nucleus if 'nucleus' in taskParamMap: taskSpec.nucleus = taskParamMap['nucleus'] # preset some parameters for job cloning if 'useJobCloning' in taskParamMap: # set implicit parameters if not 'nEventsPerWorker' in taskParamMap: taskParamMap['nEventsPerWorker'] = 1 if not 'nSitesPerJob' in taskParamMap: taskParamMap['nSitesPerJob'] = 2 if not 'nEsConsumers' in taskParamMap: taskParamMap['nEsConsumers'] = taskParamMap['nSitesPerJob'] # event service flag if 'useJobCloning' in taskParamMap: taskSpec.eventService = 2 elif taskParamMap.has_key('nEventsPerWorker'): taskSpec.eventService = 1 else: taskSpec.eventService = 0 # ttcr: requested time to completion if taskParamMap.has_key('ttcrTimestamp'): try: # get rid of the +00:00 timezone string and parse the timestamp taskSpec.ttcRequested = datetime.datetime.strptime(taskParamMap['ttcrTimestamp'].split('+')[0], '%Y-%m-%d %H:%M:%S.%f') except (IndexError, ValueError): pass # goal if 'goal' in taskParamMap: try: taskSpec.goal = int(float(taskParamMap['goal'])*10) if taskSpec.goal >= 1000: taskSpec.goal = None except: pass # campaign if taskParamMap.has_key('campaign'): taskSpec.campaign = taskParamMap['campaign'] # request type if 'requestType' in taskParamMap: taskSpec.requestType = taskParamMap['requestType'] self.taskSpec = taskSpec # set split rule if 'tgtNumEventsPerJob' in taskParamMap: # set nEventsPerJob not respect file boundaries when nFilesPerJob is not used if not 'nFilesPerJob' in taskParamMap: self.setSplitRule(None,taskParamMap['tgtNumEventsPerJob'],JediTaskSpec.splitRuleToken['nEventsPerJob']) self.setSplitRule(taskParamMap,'nFilesPerJob', JediTaskSpec.splitRuleToken['nFilesPerJob']) self.setSplitRule(taskParamMap,'nEventsPerJob', JediTaskSpec.splitRuleToken['nEventsPerJob']) self.setSplitRule(taskParamMap,'nGBPerJob', JediTaskSpec.splitRuleToken['nGBPerJob']) self.setSplitRule(taskParamMap,'nMaxFilesPerJob', JediTaskSpec.splitRuleToken['nMaxFilesPerJob']) self.setSplitRule(taskParamMap,'nEventsPerWorker', JediTaskSpec.splitRuleToken['nEventsPerWorker']) self.setSplitRule(taskParamMap,'useLocalIO', JediTaskSpec.splitRuleToken['useLocalIO']) self.setSplitRule(taskParamMap,'disableAutoRetry', JediTaskSpec.splitRuleToken['disableAutoRetry']) self.setSplitRule(taskParamMap,'nEsConsumers', JediTaskSpec.splitRuleToken['nEsConsumers']) self.setSplitRule(taskParamMap,'waitInput', JediTaskSpec.splitRuleToken['waitInput']) self.setSplitRule(taskParamMap,'addNthFieldToLFN', JediTaskSpec.splitRuleToken['addNthFieldToLFN']) self.setSplitRule(taskParamMap,'scoutSuccessRate', JediTaskSpec.splitRuleToken['scoutSuccessRate']) self.setSplitRule(taskParamMap,'t1Weight', JediTaskSpec.splitRuleToken['t1Weight']) self.setSplitRule(taskParamMap,'maxAttemptES', JediTaskSpec.splitRuleToken['maxAttemptES']) self.setSplitRule(taskParamMap,'nSitesPerJob', JediTaskSpec.splitRuleToken['nSitesPerJob']) self.setSplitRule(taskParamMap,'nJumboJobs', JediTaskSpec.splitRuleToken['nJumboJobs']) self.setSplitRule(taskParamMap,'nEventsPerMergeJob', JediTaskSpec.splitRuleToken['nEventsPerMergeJob']) self.setSplitRule(taskParamMap,'nFilesPerMergeJob', JediTaskSpec.splitRuleToken['nFilesPerMergeJob']) self.setSplitRule(taskParamMap,'nGBPerMergeJob', JediTaskSpec.splitRuleToken['nGBPerMergeJob']) self.setSplitRule(taskParamMap,'nMaxFilesPerMergeJob', JediTaskSpec.splitRuleToken['nMaxFilesPerMergeJob']) if taskParamMap.has_key('loadXML'): self.setSplitRule(None,3,JediTaskSpec.splitRuleToken['loadXML']) self.setSplitRule(None,4,JediTaskSpec.splitRuleToken['groupBoundaryID']) if taskParamMap.has_key('pfnList'): self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['pfnList']) if taskParamMap.has_key('noWaitParent') and taskParamMap['noWaitParent'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['noWaitParent']) if 'respectLB' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['respectLB']) if taskParamMap.has_key('reuseSecOnDemand'): self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['reuseSecOnDemand']) if 'ddmBackEnd' in taskParamMap: self.taskSpec.setDdmBackEnd(taskParamMap['ddmBackEnd']) if 'disableReassign' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['disableReassign']) if 'allowPartialFinish' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['allowPartialFinish']) if 'useExhausted' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['useExhausted']) if 'useRealNumEvents' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['useRealNumEvents']) if 'ipConnectivity' in taskParamMap: self.taskSpec.setIpConnectivity(taskParamMap['ipConnectivity']) if 'altStageOut' in taskParamMap: self.taskSpec.setAltStageOut(taskParamMap['altStageOut']) if 'allowInputLAN' in taskParamMap: self.taskSpec.setAllowInputLAN(taskParamMap['allowInputLAN']) if 'runUntilClosed' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['runUntilClosed']) if 'stayOutputOnSite' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['stayOutputOnSite']) if 'useJobCloning' in taskParamMap: scValue = EventServiceUtils.getJobCloningValue(taskParamMap['useJobCloning']) self.setSplitRule(None,scValue,JediTaskSpec.splitRuleToken['useJobCloning']) if 'failWhenGoalUnreached' in taskParamMap and taskParamMap['failWhenGoalUnreached'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['failGoalUnreached']) if 'switchEStoNormal' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['switchEStoNormal']) if 'nEventsPerRange' in taskParamMap: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['dynamicNumEvents']) if 'allowInputWAN' in taskParamMap and taskParamMap['allowInputWAN'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['allowInputWAN']) if 'putLogToOS' in taskParamMap and taskParamMap['putLogToOS'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['putLogToOS']) if 'mergeEsOnOS' in taskParamMap and taskParamMap['mergeEsOnOS'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['mergeEsOnOS']) if 'writeInputToFile' in taskParamMap and taskParamMap['writeInputToFile'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['writeInputToFile']) if 'useFileAsSourceLFN' in taskParamMap and taskParamMap['useFileAsSourceLFN'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['useFileAsSourceLFN']) if 'ignoreMissingInDS' in taskParamMap and taskParamMap['ignoreMissingInDS'] == True: self.setSplitRule(None,1,JediTaskSpec.splitRuleToken['ignoreMissingInDS']) # work queue workQueue = None if 'workQueueName' in taskParamMap: # work queue is specified workQueue = workQueueMapper.getQueueWithName(taskSpec.vo,taskSpec.prodSourceLabel,taskParamMap['workQueueName']) if workQueue is None: # get work queue based on task attributes workQueue,tmpStr = workQueueMapper.getQueueWithSelParams(taskSpec.vo, taskSpec.prodSourceLabel, processingType=taskSpec.processingType, workingGroup=taskSpec.workingGroup, coreCount=taskSpec.coreCount, site=taskSpec.site, eventService=taskSpec.eventService, splitRule=taskSpec.splitRule, campaign=taskSpec.campaign) if workQueue is None: errStr = 'workqueue is undefined for vo={0} label={1} '.format(taskSpec.vo,taskSpec.prodSourceLabel) errStr += 'processingType={0} workingGroup={1} coreCount={2} eventService={3} '.format(taskSpec.processingType, taskSpec.workingGroup, taskSpec.coreCount, taskSpec.eventService) errStr += 'splitRule={0} campaign={1}'.format(taskSpec.splitRule,taskSpec.campaign) raise RuntimeError,errStr self.taskSpec.workQueue_ID = workQueue.queue_id # Initialize the global share gshare = None if 'gshare' in taskParamMap and self.taskBufferIF.is_valid_share(taskParamMap['gshare']): # work queue is specified gshare = taskParamMap['gshare'] else: # get share based on definition gshare = self.taskBufferIF.get_share_for_task(self.taskSpec) if gshare is None: gshare = 'No match' # errStr = 'share is undefined for vo={0} label={1} '.format(taskSpec.vo,taskSpec.prodSourceLabel) # errStr += 'workingGroup={0} campaign={1} '.format(taskSpec.workingGroup, taskSpec.campaign) # raise RuntimeError,errStr self.taskSpec.gshare = gshare # return return