def setJobParameter(self, par_name, par_value, sendFlag=True): """ Send job parameter for jobID """ if not self.jobID: return S_OK('Local execution, jobID is null.') timeStamp = Time.toString() # add job paramenter record self.jobParameters[par_name] = (par_value, timeStamp) if sendFlag: # and send return self.sendStoredJobParameters() return S_OK()
def setJobParameter( self, par_name, par_value, sendFlag = True ): """ Send job parameter for jobID """ if not self.jobID: return S_OK( 'Local execution, jobID is null.' ) timeStamp = Time.toString() # add job parameter record self.jobParameters[par_name] = ( par_value, timeStamp ) if sendFlag: # and send return self.sendStoredJobParameters() return S_OK()
def setApplicationStatus(self, appStatus, sendFlag=True): """ Send application status information to the JobState service for jobID """ if not self.jobID: return S_OK('Local execution, jobID is null.') timeStamp = Time.toString() # add Application status record self.appStatusInfo.append((appStatus.replace("'", ''), timeStamp)) if sendFlag: # and send return self.sendStoredStatusInfo() return S_OK()
def setApplicationStatus( self, appStatus, sendFlag = True ): """ Send application status information to the JobState service for jobID """ if not self.jobID: return S_OK( 'Local execution, jobID is null.' ) timeStamp = Time.toString() # add Application status record self.appStatusInfo.append( ( appStatus.replace( "'", '' ), timeStamp ) ) if sendFlag: # and send return self.sendStoredStatusInfo() return S_OK()
def setJobParameters(self, parameters, sendFlag=True): """ Send job parameters for jobID """ if not self.jobID: return S_OK("Local execution, jobID is null.") timeStamp = Time.toString() # add job parameter record for pname, pvalue in parameters: self.jobParameters[pname] = (pvalue, timeStamp) if sendFlag: # and send return self.sendStoredJobParameters() return S_OK()
def setJobStatus(self, status="", minor="", application="", sendFlag=True): """ Send job status information to the JobState service for jobID """ if not self.jobID: return S_OK("Local execution, jobID is null.") timeStamp = Time.toString() # add job status record self.jobStatusInfo.append((status.replace("'", ""), minor.replace("'", ""), timeStamp)) if application: self.appStatusInfo.append((application.replace("'", ""), timeStamp)) if sendFlag: # and send return self.sendStoredStatusInfo() return S_OK()
def __init__(self,rpcStub= None,executionOrder=0): """Instantiates the Workflow object and some default parameters. """ self.subAttributeNames = ['Status','SubRequestID','Operation','ExecutionOrder','CreationTime','LastUpdate','Arguments'] self.subAttributes = {} for attr in self.subAttributeNames: self.subAttributes[attr] = "Unknown" # Some initial values self.subAttributes['Status'] = "Waiting" self.subAttributes['SubRequestID'] = makeGuid() self.subAttributes['CreationTime'] = Time.toString() self.subAttributes['ExecutionOrder'] = executionOrder if rpcStub: self.subAttributes['Arguments'] = DEncode.encode(rpcStub) self.subAttributes['Operation'] = rpcStub[1]
def execute( self ): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Add their Priorities 3.- Submit pilots """ self.__checkSubmitPools() self.directorDict = getResourceDict() #Add all submit pools self.directorDict[ 'SubmitPool' ] = self.am_getOption( "SubmitPools" ) #Add all DIRAC platforms if not specified otherwise if not 'Platform' in self.directorDict: result = gConfig.getOptionsDict( '/Resources/Computing/OSCompatibility' ) if result['OK']: self.directorDict['Platform'] = result['Value'].keys() rpcMatcher = RPCClient( "WorkloadManagement/Matcher" ) result = rpcMatcher.getMatchingTaskQueues( self.directorDict ) if not result['OK']: self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] ) return result taskQueueDict = result['Value'] self.log.info( 'Found %s TaskQueues' % len( taskQueueDict ) ) if not taskQueueDict: self.log.info( 'No TaskQueue to Process' ) return S_OK() prioritySum = 0 waitingJobs = 0 for taskQueueID in taskQueueDict: taskQueueDict[taskQueueID]['TaskQueueID'] = taskQueueID prioritySum += taskQueueDict[taskQueueID]['Priority'] waitingJobs += taskQueueDict[taskQueueID]['Jobs'] self.log.info( 'Sum of Priorities %s' % prioritySum ) if waitingJobs == 0: self.log.info( 'No waiting Jobs' ) return S_OK( 'No waiting Jobs' ) if prioritySum <= 0: return S_ERROR( 'Wrong TaskQueue Priorities' ) self.pilotsPerPriority = self.am_getOption( 'pilotsPerIteration' ) / prioritySum self.pilotsPerJob = self.am_getOption( 'pilotsPerIteration' ) / waitingJobs self.callBackLock.acquire() self.submittedPilots = 0 self.callBackLock.release() self.toSubmitPilots = 0 waitingStatusList = ['Submitted', 'Ready', 'Scheduled', 'Waiting'] timeLimitToConsider = Time.toString( Time.dateTime() - Time.hour * self.am_getOption( "maxPilotWaitingHours" ) ) for taskQueueID in taskQueueDict: self.log.verbose( 'Processing TaskQueue', taskQueueID ) result = pilotAgentsDB.countPilots( { 'TaskQueueID': taskQueueID, 'Status': waitingStatusList}, None, timeLimitToConsider ) if not result['OK']: self.log.error( 'Fail to get Number of Waiting pilots', result['Message'] ) waitingPilots = 0 else: waitingPilots = result['Value'] self.log.verbose( 'Waiting Pilots for TaskQueue %s:' % taskQueueID, waitingPilots ) result = self.submitPilotsForTaskQueue( taskQueueDict[taskQueueID], waitingPilots ) if result['OK']: self.toSubmitPilots += result['Value'] self.log.info( 'Number of pilots to be Submitted %s' % self.toSubmitPilots ) # Now wait until all Jobs in the Default ThreadPool are proccessed if 'Default' in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools['Default'].processAllResults() self.log.info( 'Number of pilots Submitted %s' % self.submittedPilots ) return S_OK()
def execute(self): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Add their Priorities 3.- Submit pilots """ self.__checkSubmitPools() self.directorDict = getResourceDict() # Add all submit pools self.directorDict["SubmitPool"] = self.am_getOption("SubmitPools") # Add all DIRAC platforms if not specified otherwise if not "Platform" in self.directorDict: result = getDIRACPlatforms() if result["OK"]: self.directorDict["Platform"] = result["Value"] rpcMatcher = RPCClient("WorkloadManagement/Matcher") result = rpcMatcher.getMatchingTaskQueues(self.directorDict) if not result["OK"]: self.log.error("Could not retrieve TaskQueues from TaskQueueDB", result["Message"]) return result taskQueueDict = result["Value"] self.log.info("Found %s TaskQueues" % len(taskQueueDict)) if not taskQueueDict: self.log.info("No TaskQueue to Process") return S_OK() prioritySum = 0 waitingJobs = 0 for taskQueueID in taskQueueDict: taskQueueDict[taskQueueID]["TaskQueueID"] = taskQueueID prioritySum += taskQueueDict[taskQueueID]["Priority"] waitingJobs += taskQueueDict[taskQueueID]["Jobs"] self.log.info("Sum of Priorities %s" % prioritySum) if waitingJobs == 0: self.log.info("No waiting Jobs") return S_OK("No waiting Jobs") if prioritySum <= 0: return S_ERROR("Wrong TaskQueue Priorities") self.pilotsPerPriority = self.am_getOption("pilotsPerIteration") / prioritySum self.pilotsPerJob = self.am_getOption("pilotsPerIteration") / waitingJobs self.callBackLock.acquire() self.submittedPilots = 0 self.callBackLock.release() self.toSubmitPilots = 0 waitingStatusList = ["Submitted", "Ready", "Scheduled", "Waiting"] timeLimitToConsider = Time.toString(Time.dateTime() - Time.hour * self.am_getOption("maxPilotWaitingHours")) for taskQueueID in taskQueueDict: self.log.verbose("Processing TaskQueue", taskQueueID) result = pilotAgentsDB.countPilots( {"TaskQueueID": taskQueueID, "Status": waitingStatusList}, None, timeLimitToConsider ) if not result["OK"]: self.log.error("Fail to get Number of Waiting pilots", result["Message"]) waitingPilots = 0 else: waitingPilots = result["Value"] self.log.verbose("Waiting Pilots for TaskQueue %s:" % taskQueueID, waitingPilots) result = self.submitPilotsForTaskQueue(taskQueueDict[taskQueueID], waitingPilots) if result["OK"]: self.toSubmitPilots += result["Value"] self.log.info("Number of pilots to be Submitted %s" % self.toSubmitPilots) # Now wait until all Jobs in the Default ThreadPool are proccessed if "Default" in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools["Default"].processAllResults() self.log.info("Number of pilots Submitted %s" % self.submittedPilots) return S_OK()
def export_setJobStatusBulk( self, jobID, statusDict ): """ Set various status fields for job specified by its JobId. Set only the last status in the JobDB, updating all the status logging information in the JobLoggingDB. The statusDict has datetime as a key and status information dictionary as values """ status = "" minor = "" application = "" appCounter = "" endDate = '' startDate = '' startFlag = '' jobID = int( jobID ) result = jobDB.getJobAttributes( jobID, ['Status'] ) if not result['OK']: return result if not result['Value']: # if there is no matching Job it returns an empty dictionary return S_ERROR( 'No Matching Job' ) new_status = result['Value']['Status'] if new_status == "Stalled": status = 'Running' # Get the latest WN time stamps of status updates result = logDB.getWMSTimeStamps( int( jobID ) ) if not result['OK']: return result lastTime = max( [float( t ) for s, t in result['Value'].items() if s != 'LastTime'] ) from DIRAC import Time lastTime = Time.toString( Time.fromEpoch( lastTime ) ) # Get the last status values dates = sorted( statusDict ) # We should only update the status if its time stamp is more recent than the last update for date in [date for date in dates if date >= lastTime]: sDict = statusDict[date] if sDict['Status']: status = sDict['Status'] if status in JOB_FINAL_STATES: endDate = date if status == "Running": startFlag = 'Running' if sDict['MinorStatus']: minor = sDict['MinorStatus'] if minor == "Application" and startFlag == 'Running': startDate = date if sDict['ApplicationStatus']: application = sDict['ApplicationStatus'] counter = sDict.get( 'ApplicationCounter' ) if counter: appCounter = counter attrNames = [] attrValues = [] if status: attrNames.append( 'Status' ) attrValues.append( status ) if minor: attrNames.append( 'MinorStatus' ) attrValues.append( minor ) if application: attrNames.append( 'ApplicationStatus' ) attrValues.append( application ) if appCounter: attrNames.append( 'ApplicationCounter' ) attrValues.append( appCounter ) result = jobDB.setJobAttributes( jobID, attrNames, attrValues, update = True ) if not result['OK']: return result if endDate: result = jobDB.setEndExecTime( jobID, endDate ) if startDate: result = jobDB.setStartExecTime( jobID, startDate ) # Update the JobLoggingDB records for date in dates: sDict = statusDict[date] status = sDict['Status'] if not status: status = 'idem' minor = sDict['MinorStatus'] if not minor: minor = 'idem' application = sDict['ApplicationStatus'] if not application: application = 'idem' else: status = "Running" minor = "Application" source = sDict['Source'] result = logDB.addLoggingRecord( jobID, status, minor, application, date, source ) if not result['OK']: return result return S_OK()
def execute(self): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Add their Priorities 3.- Submit pilots """ self.__checkSubmitPools() self.directorDict = getResourceDict() #Add all submit pools self.directorDict['SubmitPool'] = self.am_getOption("SubmitPools") rpcMatcher = RPCClient("WorkloadManagement/Matcher") result = rpcMatcher.getMatchingTaskQueues(self.directorDict) if not result['OK']: self.log.error('Could not retrieve TaskQueues from TaskQueueDB', result['Message']) return result taskQueueDict = result['Value'] self.log.info('Found %s TaskQueues' % len(taskQueueDict)) if not taskQueueDict: self.log.info('No TaskQueue to Process') return S_OK() prioritySum = 0 waitingJobs = 0 for taskQueueID in taskQueueDict: taskQueueDict[taskQueueID]['TaskQueueID'] = taskQueueID prioritySum += taskQueueDict[taskQueueID]['Priority'] waitingJobs += taskQueueDict[taskQueueID]['Jobs'] self.log.info('Sum of Priorities %s' % prioritySum) if waitingJobs == 0: self.log.info('No waiting Jobs') return S_OK('No waiting Jobs') if prioritySum <= 0: return S_ERROR('Wrong TaskQueue Priorities') self.pilotsPerPriority = self.am_getOption( 'pilotsPerIteration') / prioritySum self.pilotsPerJob = self.am_getOption( 'pilotsPerIteration') / waitingJobs self.callBackLock.acquire() self.submittedPilots = 0 self.callBackLock.release() self.toSubmitPilots = 0 waitingStatusList = ['Submitted', 'Ready', 'Scheduled', 'Waiting'] timeLimitToConsider = Time.toString( Time.dateTime() - Time.hour * self.am_getOption("maxPilotWaitingHours")) for taskQueueID in taskQueueDict: self.log.verbose('Processing TaskQueue', taskQueueID) result = pilotAgentsDB.countPilots( { 'TaskQueueID': taskQueueID, 'Status': waitingStatusList }, None, timeLimitToConsider) if not result['OK']: self.log.error('Fail to get Number of Waiting pilots', result['Message']) waitingPilots = 0 else: waitingPilots = result['Value'] self.log.verbose( 'Waiting Pilots for TaskQueue %s:' % taskQueueID, waitingPilots) result = self.submitPilotsForTaskQueue(taskQueueDict[taskQueueID], waitingPilots) if result['OK']: self.toSubmitPilots += result['Value'] self.log.info('Number of pilots to be Submitted %s' % self.toSubmitPilots) # Now wait until all Jobs in the Default ThreadPool are proccessed if 'Default' in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools['Default'].processAllResults() self.log.info('Number of pilots Submitted %s' % self.submittedPilots) return S_OK()
def export_setJobStatusBulk(self, jobID, statusDict): """ Set various status fields for job specified by its JobId. Set only the last status in the JobDB, updating all the status logging information in the JobLoggingDB. The statusDict has datetime as a key and status information dictionary as values """ status = "" minor = "" application = "" appCounter = "" endDate = '' startDate = '' startFlag = '' jobID = int(jobID) result = jobDB.getJobAttributes(jobID, ['Status']) if not result['OK']: return result if not result['Value']: # if there is no matching Job it returns an empty dictionary return S_ERROR('No Matching Job') new_status = result['Value']['Status'] if new_status == "Stalled": status = 'Running' # Get the latest WN time stamps of status updates result = logDB.getWMSTimeStamps(int(jobID)) if not result['OK']: return result lastTime = max( [float(t) for s, t in result['Value'].items() if s != 'LastTime']) from DIRAC import Time lastTime = Time.toString(Time.fromEpoch(lastTime)) # Get the last status values dates = sorted(statusDict) # We should only update the status if its time stamp is more recent than the last update for date in [date for date in dates if date >= lastTime]: sDict = statusDict[date] if sDict['Status']: status = sDict['Status'] if status in JOB_FINAL_STATES: endDate = date if status == "Running": startFlag = 'Running' if sDict['MinorStatus']: minor = sDict['MinorStatus'] if minor == "Application" and startFlag == 'Running': startDate = date if sDict['ApplicationStatus']: application = sDict['ApplicationStatus'] counter = sDict.get('ApplicationCounter') if counter: appCounter = counter attrNames = [] attrValues = [] if status: attrNames.append('Status') attrValues.append(status) if minor: attrNames.append('MinorStatus') attrValues.append(minor) if application: attrNames.append('ApplicationStatus') attrValues.append(application) if appCounter: attrNames.append('ApplicationCounter') attrValues.append(appCounter) result = jobDB.setJobAttributes(jobID, attrNames, attrValues, update=True) if not result['OK']: return result if endDate: result = jobDB.setEndExecTime(jobID, endDate) if startDate: result = jobDB.setStartExecTime(jobID, startDate) # Update the JobLoggingDB records for date in dates: sDict = statusDict[date] status = sDict['Status'] if not status: status = 'idem' minor = sDict['MinorStatus'] if not minor: minor = 'idem' application = sDict['ApplicationStatus'] if not application: application = 'idem' else: status = "Running" minor = "Application" source = sDict['Source'] result = logDB.addLoggingRecord(jobID, status, minor, application, date, source) if not result['OK']: return result return S_OK()