def addLoggingRecord(self, jobID, status='idem', minor='idem', application='idem', date='', source='Unknown'): """ Add a new entry to the JobLoggingDB table. One, two or all the three status components can be specified. Optionaly the time stamp of the status can be provided in a form of a string in a format '%Y-%m-%d %H:%M:%S' or as datetime.datetime object. If the time stamp is not provided the current UTC time is used. """ event = 'status/minor/app=%s/%s/%s' % (status, minor, application) self.gLogger.info("Adding record for job " + str(jobID) + ": '" + event + "' from " + source) if not date: # Make the UTC datetime string and float _date = Time.dateTime() epoc = time.mktime(_date.timetuple( )) + _date.microsecond / 1000000. - MAGIC_EPOC_NUMBER time_order = round(epoc, 3) else: try: if type(date) in StringTypes: # The date is provided as a string in UTC _date = Time.fromString(date) epoc = time.mktime(_date.timetuple( )) + _date.microsecond / 1000000. - MAGIC_EPOC_NUMBER time_order = round(epoc, 3) elif type(date) == Time._dateTimeType: _date = date epoc = time.mktime(_date.timetuple( )) + _date.microsecond / 1000000. - MAGIC_EPOC_NUMBER time_order = round(epoc, 3) else: self.gLogger.error('Incorrect date for the logging record') _date = Time.dateTime() epoc = time.mktime(_date.timetuple()) - MAGIC_EPOC_NUMBER time_order = round(epoc, 3) except: self.gLogger.exception('Exception while date evaluation') _date = Time.dateTime() epoc = time.mktime(_date.timetuple()) - MAGIC_EPOC_NUMBER time_order = round(epoc, 3) cmd = "INSERT INTO LoggingInfo (JobId, Status, MinorStatus, ApplicationStatus, " + \ "StatusTime, StatusTimeOrder, StatusSource) VALUES (%d,'%s','%s','%s','%s',%f,'%s')" % \ (int(jobID),status,minor,application,str(_date),time_order,source) return self._update(cmd)
def addLoggingRecord(self, jobID, status='idem', minor='idem', application='idem', date='', source='Unknown'): """ Add a new entry to the JobLoggingDB table. One, two or all the three status components can be specified. Optionaly the time stamp of the status can be provided in a form of a string in a format '%Y-%m-%d %H:%M:%S' or as datetime.datetime object. If the time stamp is not provided the current UTC time is used. """ event = 'status/minor/app=%s/%s/%s' % (status,minor,application) self.gLogger.info("Adding record for job "+str(jobID)+": '"+event+"' from "+source) if not date: # Make the UTC datetime string and float _date = Time.dateTime() epoc = time.mktime(_date.timetuple())+_date.microsecond/1000000. - MAGIC_EPOC_NUMBER time_order = round(epoc,3) else: try: if type(date) in StringTypes: # The date is provided as a string in UTC _date = Time.fromString(date) epoc = time.mktime(_date.timetuple())+_date.microsecond/1000000. - MAGIC_EPOC_NUMBER time_order = round(epoc,3) elif type(date) == Time._dateTimeType: _date = date epoc = time.mktime(_date.timetuple())+_date.microsecond/1000000. - MAGIC_EPOC_NUMBER time_order = round(epoc,3) else: self.gLogger.error('Incorrect date for the logging record') _date = Time.dateTime() epoc = time.mktime(_date.timetuple()) - MAGIC_EPOC_NUMBER time_order = round(epoc,3) except: self.gLogger.exception('Exception while date evaluation') _date = Time.dateTime() epoc = time.mktime(_date.timetuple()) - MAGIC_EPOC_NUMBER time_order = round(epoc,3) cmd = "INSERT INTO LoggingInfo (JobId, Status, MinorStatus, ApplicationStatus, " + \ "StatusTime, StatusTimeOrder, StatusSource) VALUES (%d,'%s','%s','%s','%s',%f,'%s')" % \ (int(jobID),status,minor,application,str(_date),time_order,source) return self._update( cmd )
def setLastUpdate(self, time=''): """ Set the last update to the current data and time """ if not time: time = str(Time.dateTime()) self.LastUpdate = time return S_OK()
def initialize( self, request ): """ Set default values to attributes,parameters """ if type( request ) == types.NoneType: # Set some defaults for name in self.attributeNames: self.attributes[name] = 'Unknown' self.attributes['CreationTime'] = str( Time.dateTime() ) self.attributes['Status'] = "New" result = getProxyInfo() if result['OK']: proxyDict = result[ 'Value' ] self.attributes['OwnerDN'] = proxyDict[ 'identity' ] if 'group' in proxyDict: self.attributes['OwnerGroup'] = proxyDict[ 'group' ] self.attributes['DIRACSetup'] = gConfig.getValue( '/DIRAC/Setup', 'Unknown' ) elif type( request ) == types.InstanceType: for attr in self.attributeNames: self.attributes[attr] = request.attributes[attr] # initialize request from an XML string if type( request ) in types.StringTypes: for name in self.attributeNames: self.attributes[name] = 'Unknown' self.parseRequest( request ) # Initialize request from another request elif type( request ) == types.InstanceType: self.subRequests = copy.deepcopy( request.subrequests )
def setCreationTime(self, time=''): """ Set the creation time to the current data and time """ if not time: time = str(Time.dateTime()) self.CreationTime = time return S_OK()
def __init__(self): # These are the subrequest attributes self.RequestType = '' self.Status = 'Waiting' self.SubRequestID = 0 self.Operation = '' self.SourceSE = '' self.TargetSE = '' self.CreationTime = str(Time.dateTime()) self.SubmissionTime = str(Time.dateTime()) self.LastUpdate = str(Time.dateTime()) self.Error = '' self.Catalog = '' self.Arguments = '' self.Files = [] self.Datasets = []
def setLastUpdate(self,time=''): """ Set the last update to the current data and time """ if not time: time = str(Time.dateTime()) self.LastUpdate = time return S_OK()
def setCreationTime(self,time=''): """ Set the creation time to the current data and time """ if not time: time = str(Time.dateTime()) self.CreationTime = time return S_OK()
def execute( self ): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Add their Priorities 3.- Submit pilots """ self.__checkSubmitPools() self.directorDict = getResourceDict() #Add all submit pools self.directorDict[ 'SubmitPool' ] = self.am_getOption( "SubmitPools" ) #Add all DIRAC platforms if not specified otherwise if not 'Platform' in self.directorDict: result = gConfig.getOptionsDict( '/Resources/Computing/OSCompatibility' ) if result['OK']: self.directorDict['Platform'] = result['Value'].keys() rpcMatcher = RPCClient( "WorkloadManagement/Matcher" ) result = rpcMatcher.getMatchingTaskQueues( self.directorDict ) if not result['OK']: self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] ) return result taskQueueDict = result['Value'] self.log.info( 'Found %s TaskQueues' % len( taskQueueDict ) ) if not taskQueueDict: self.log.info( 'No TaskQueue to Process' ) return S_OK() prioritySum = 0 waitingJobs = 0 for taskQueueID in taskQueueDict: taskQueueDict[taskQueueID]['TaskQueueID'] = taskQueueID prioritySum += taskQueueDict[taskQueueID]['Priority'] waitingJobs += taskQueueDict[taskQueueID]['Jobs'] self.log.info( 'Sum of Priorities %s' % prioritySum ) if waitingJobs == 0: self.log.info( 'No waiting Jobs' ) return S_OK( 'No waiting Jobs' ) if prioritySum <= 0: return S_ERROR( 'Wrong TaskQueue Priorities' ) self.pilotsPerPriority = self.am_getOption( 'pilotsPerIteration' ) / prioritySum self.pilotsPerJob = self.am_getOption( 'pilotsPerIteration' ) / waitingJobs self.callBackLock.acquire() self.submittedPilots = 0 self.callBackLock.release() self.toSubmitPilots = 0 waitingStatusList = ['Submitted', 'Ready', 'Scheduled', 'Waiting'] timeLimitToConsider = Time.toString( Time.dateTime() - Time.hour * self.am_getOption( "maxPilotWaitingHours" ) ) for taskQueueID in taskQueueDict: self.log.verbose( 'Processing TaskQueue', taskQueueID ) result = pilotAgentsDB.countPilots( { 'TaskQueueID': taskQueueID, 'Status': waitingStatusList}, None, timeLimitToConsider ) if not result['OK']: self.log.error( 'Fail to get Number of Waiting pilots', result['Message'] ) waitingPilots = 0 else: waitingPilots = result['Value'] self.log.verbose( 'Waiting Pilots for TaskQueue %s:' % taskQueueID, waitingPilots ) result = self.submitPilotsForTaskQueue( taskQueueDict[taskQueueID], waitingPilots ) if result['OK']: self.toSubmitPilots += result['Value'] self.log.info( 'Number of pilots to be Submitted %s' % self.toSubmitPilots ) # Now wait until all Jobs in the Default ThreadPool are proccessed if 'Default' in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools['Default'].processAllResults() self.log.info( 'Number of pilots Submitted %s' % self.submittedPilots ) return S_OK()
def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ): """ This method does the actual pilot submission to the Grid RB The logic is as follows: - If there are no available RB it return error - If there is no VOMS extension in the proxy, return error - It creates a temp directory - Prepare a JDL it has some part common to gLite and LCG (the payload description) it has some part specific to each middleware """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] credDict = proxy.getCredentials()['Value'] ownerDN = credDict['identity'] ownerGroup = credDict[ 'group' ] if not self.resourceBrokers: # Since we can exclude RBs from the list, it may become empty return S_ERROR( ERROR_RB ) # Need to get VOMS extension for the later interactions with WMS ret = gProxyManager.getVOMSAttributes( proxy ) if not ret['OK']: self.log.error( ERROR_VOMS, ret['Message'] ) return S_ERROR( ERROR_VOMS ) if not ret['Value']: return S_ERROR( ERROR_VOMS ) vomsGroup = ret['Value'][0] workingDirectory = tempfile.mkdtemp( prefix = 'TQ_%s_' % taskQueueID, dir = workDir ) self.log.verbose( 'Using working Directory:', workingDirectory ) # Write JDL retDict = self._prepareJDL( taskQueueDict, workingDirectory, pilotOptions, pilotsPerJob, ceMask, submitPrivatePilot, privateTQ ) jdl = retDict['JDL'] pilotRequirements = retDict['Requirements'] rb = retDict['RB'] if not jdl: try: shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_JDL ) # Check that there are available queues for the Job: if self.enableListMatch: availableCEs = [] now = Time.dateTime() availableCEs = self.listMatchCache.get( pilotRequirements ) if availableCEs == False: availableCEs = self._listMatch( proxy, jdl, taskQueueID, rb ) if availableCEs != False: self.log.verbose( 'LastListMatch', now ) self.log.verbose( 'AvailableCEs ', availableCEs ) self.listMatchCache.add( pilotRequirements, self.listMatchDelay * 60, value = availableCEs ) # it is given in minutes if not availableCEs: try: shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_CE + ' TQ: %d' % taskQueueID ) # Now we are ready for the actual submission, so self.log.verbose( 'Submitting Pilots for TaskQueue', taskQueueID ) submitRet = self._submitPilot( proxy, pilotsPerJob, jdl, taskQueueID, rb ) try: shutil.rmtree( workingDirectory ) except: pass if not submitRet: return S_ERROR( 'Pilot Submission Failed for TQ %d ' % taskQueueID ) # pilotReference, resourceBroker = submitRet submittedPilots = 0 if pilotsPerJob != 1 and len( submitRet ) != pilotsPerJob: # Parametric jobs are used for pilotReference, resourceBroker in submitRet: pilotReference = self._getChildrenReferences( proxy, pilotReference, taskQueueID ) submittedPilots += len( pilotReference ) pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN, ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements ) else: for pilotReference, resourceBroker in submitRet: pilotReference = [pilotReference] submittedPilots += len( pilotReference ) pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN, ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements ) # add some sleep here time.sleep( 0.1 * submittedPilots ) if pilotsToSubmit > pilotsPerJob: # Additional submissions are necessary, need to get a new token and iterate. pilotsToSubmit -= pilotsPerJob result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) ) if not result[ 'OK' ]: self.log.error( ERROR_TOKEN, result['Message'] ) result = S_ERROR( ERROR_TOKEN ) result['Value'] = submittedPilots return result ( token, numberOfUses ) = result[ 'Value' ] for option in pilotOptions: if option.find( '-o /Security/ProxyToken=' ) == 0: pilotOptions.remove( option ) pilotOptions.append( '-o /Security/ProxyToken=%s' % token ) pilotsPerJob = max( 1, min( pilotsPerJob, int( numberOfUses / self.maxJobsInFillMode ) ) ) result = self._submitPilots( workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ) if not result['OK']: if 'Value' not in result: result['Value'] = 0 result['Value'] += submittedPilots return result submittedPilots += result['Value'] return S_OK( submittedPilots )
def execute(self): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Add their Priorities 3.- Submit pilots """ self.__checkSubmitPools() self.directorDict = getResourceDict() # Add all submit pools self.directorDict["SubmitPool"] = self.am_getOption("SubmitPools") # Add all DIRAC platforms if not specified otherwise if not "Platform" in self.directorDict: result = getDIRACPlatforms() if result["OK"]: self.directorDict["Platform"] = result["Value"] rpcMatcher = RPCClient("WorkloadManagement/Matcher") result = rpcMatcher.getMatchingTaskQueues(self.directorDict) if not result["OK"]: self.log.error("Could not retrieve TaskQueues from TaskQueueDB", result["Message"]) return result taskQueueDict = result["Value"] self.log.info("Found %s TaskQueues" % len(taskQueueDict)) if not taskQueueDict: self.log.info("No TaskQueue to Process") return S_OK() prioritySum = 0 waitingJobs = 0 for taskQueueID in taskQueueDict: taskQueueDict[taskQueueID]["TaskQueueID"] = taskQueueID prioritySum += taskQueueDict[taskQueueID]["Priority"] waitingJobs += taskQueueDict[taskQueueID]["Jobs"] self.log.info("Sum of Priorities %s" % prioritySum) if waitingJobs == 0: self.log.info("No waiting Jobs") return S_OK("No waiting Jobs") if prioritySum <= 0: return S_ERROR("Wrong TaskQueue Priorities") self.pilotsPerPriority = self.am_getOption("pilotsPerIteration") / prioritySum self.pilotsPerJob = self.am_getOption("pilotsPerIteration") / waitingJobs self.callBackLock.acquire() self.submittedPilots = 0 self.callBackLock.release() self.toSubmitPilots = 0 waitingStatusList = ["Submitted", "Ready", "Scheduled", "Waiting"] timeLimitToConsider = Time.toString(Time.dateTime() - Time.hour * self.am_getOption("maxPilotWaitingHours")) for taskQueueID in taskQueueDict: self.log.verbose("Processing TaskQueue", taskQueueID) result = pilotAgentsDB.countPilots( {"TaskQueueID": taskQueueID, "Status": waitingStatusList}, None, timeLimitToConsider ) if not result["OK"]: self.log.error("Fail to get Number of Waiting pilots", result["Message"]) waitingPilots = 0 else: waitingPilots = result["Value"] self.log.verbose("Waiting Pilots for TaskQueue %s:" % taskQueueID, waitingPilots) result = self.submitPilotsForTaskQueue(taskQueueDict[taskQueueID], waitingPilots) if result["OK"]: self.toSubmitPilots += result["Value"] self.log.info("Number of pilots to be Submitted %s" % self.toSubmitPilots) # Now wait until all Jobs in the Default ThreadPool are proccessed if "Default" in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools["Default"].processAllResults() self.log.info("Number of pilots Submitted %s" % self.submittedPilots) return S_OK()
def execute(self): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Add their Priorities 3.- Submit pilots """ self.__checkSubmitPools() self.directorDict = getResourceDict() #Add all submit pools self.directorDict['SubmitPool'] = self.am_getOption("SubmitPools") rpcMatcher = RPCClient("WorkloadManagement/Matcher") result = rpcMatcher.getMatchingTaskQueues(self.directorDict) if not result['OK']: self.log.error('Could not retrieve TaskQueues from TaskQueueDB', result['Message']) return result taskQueueDict = result['Value'] self.log.info('Found %s TaskQueues' % len(taskQueueDict)) if not taskQueueDict: self.log.info('No TaskQueue to Process') return S_OK() prioritySum = 0 waitingJobs = 0 for taskQueueID in taskQueueDict: taskQueueDict[taskQueueID]['TaskQueueID'] = taskQueueID prioritySum += taskQueueDict[taskQueueID]['Priority'] waitingJobs += taskQueueDict[taskQueueID]['Jobs'] self.log.info('Sum of Priorities %s' % prioritySum) if waitingJobs == 0: self.log.info('No waiting Jobs') return S_OK('No waiting Jobs') if prioritySum <= 0: return S_ERROR('Wrong TaskQueue Priorities') self.pilotsPerPriority = self.am_getOption( 'pilotsPerIteration') / prioritySum self.pilotsPerJob = self.am_getOption( 'pilotsPerIteration') / waitingJobs self.callBackLock.acquire() self.submittedPilots = 0 self.callBackLock.release() self.toSubmitPilots = 0 waitingStatusList = ['Submitted', 'Ready', 'Scheduled', 'Waiting'] timeLimitToConsider = Time.toString( Time.dateTime() - Time.hour * self.am_getOption("maxPilotWaitingHours")) for taskQueueID in taskQueueDict: self.log.verbose('Processing TaskQueue', taskQueueID) result = pilotAgentsDB.countPilots( { 'TaskQueueID': taskQueueID, 'Status': waitingStatusList }, None, timeLimitToConsider) if not result['OK']: self.log.error('Fail to get Number of Waiting pilots', result['Message']) waitingPilots = 0 else: waitingPilots = result['Value'] self.log.verbose( 'Waiting Pilots for TaskQueue %s:' % taskQueueID, waitingPilots) result = self.submitPilotsForTaskQueue(taskQueueDict[taskQueueID], waitingPilots) if result['OK']: self.toSubmitPilots += result['Value'] self.log.info('Number of pilots to be Submitted %s' % self.toSubmitPilots) # Now wait until all Jobs in the Default ThreadPool are proccessed if 'Default' in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools['Default'].processAllResults() self.log.info('Number of pilots Submitted %s' % self.submittedPilots) return S_OK()
def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ): """ This method does the actual pilot submission to the Grid RB The logic is as follows: - If there are no available RB it return error - If there is no VOMS extension in the proxy, return error - It creates a temp directory - Prepare a JDL it has some part common to gLite and LCG (the payload description) it has some part specific to each middleware """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] ownerDN = proxy.getCredentials()['Value']['identity'] if not self.resourceBrokers: # Since we can exclude RBs from the list, it may become empty return S_ERROR( ERROR_RB ) # Need to get VOMS extension for the later interactions with WMS ret = gProxyManager.getVOMSAttributes( proxy ) if not ret['OK']: self.log.error( ERROR_VOMS, ret['Message'] ) return S_ERROR( ERROR_VOMS ) if not ret['Value']: return S_ERROR( ERROR_VOMS ) vomsGroup = ret['Value'][0] workingDirectory = tempfile.mkdtemp( prefix = 'TQ_%s_' % taskQueueID, dir = workDir ) self.log.verbose( 'Using working Directory:', workingDirectory ) # Write JDL retDict = self._prepareJDL( taskQueueDict, workingDirectory, pilotOptions, pilotsPerJob, ceMask, submitPrivatePilot, privateTQ ) jdl = retDict['JDL'] pilotRequirements = retDict['Requirements'] rb = retDict['RB'] if not jdl: try: shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_JDL ) # Check that there are available queues for the Job: if self.enableListMatch: availableCEs = [] now = Time.dateTime() availableCEs = self.listMatchCache.get( pilotRequirements ) if availableCEs == False: availableCEs = self._listMatch( proxy, jdl, taskQueueID, rb ) if availableCEs != False: self.log.verbose( 'LastListMatch', now ) self.log.verbose( 'AvailableCEs ', availableCEs ) self.listMatchCache.add( pilotRequirements, self.listMatchDelay * 60, value = availableCEs ) # it is given in minutes if not availableCEs: try: shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_CE + ' TQ: %d' % taskQueueID ) # Now we are ready for the actual submission, so self.log.verbose( 'Submitting Pilots for TaskQueue', taskQueueID ) submitRet = self._submitPilot( proxy, pilotsPerJob, jdl, taskQueueID, rb ) try: shutil.rmtree( workingDirectory ) except: pass if not submitRet: return S_ERROR( 'Pilot Submission Failed for TQ %d ' % taskQueueID ) # pilotReference, resourceBroker = submitRet submittedPilots = 0 if pilotsPerJob != 1 and len( submitRet ) != pilotsPerJob: # Parametric jobs are used for pilotReference, resourceBroker in submitRet: pilotReference = self._getChildrenReferences( proxy, pilotReference, taskQueueID ) submittedPilots += len( pilotReference ) pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN, vomsGroup, resourceBroker, self.gridMiddleware, pilotRequirements ) else: for pilotReference, resourceBroker in submitRet: pilotReference = [pilotReference] submittedPilots += len( pilotReference ) pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN, vomsGroup, broker = resourceBroker, gridType = self.gridMiddleware, requirements = pilotRequirements ) # add some sleep here time.sleep( 0.1 * submittedPilots ) if pilotsToSubmit > pilotsPerJob: # Additional submissions are necessary, need to get a new token and iterate. pilotsToSubmit -= pilotsPerJob ownerDN = self.genericPilotDN ownerGroup = self.genericPilotGroup result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) ) if not result[ 'OK' ]: self.log.error( ERROR_TOKEN, result['Message'] ) return S_ERROR( ERROR_TOKEN ) ( token, numberOfUses ) = result[ 'Value' ] for option in pilotOptions: if option.find( '-o /Security/ProxyToken=' ) == 0: pilotOptions.remove( option ) pilotOptions.append( '-o /Security/ProxyToken=%s' % token ) pilotsPerJob = min( pilotsPerJob, int( numberOfUses / self.maxJobsInFillMode ) ) result = self._submitPilots( workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ) if not result['OK']: result['Value'] = submittedPilots return result submittedPilots += result['Value'] return S_OK( submittedPilots )