def initializeJobMonitoringHandler(serviceInfo): global jobDB, jobLoggingDB, taskQueueDB jobDB = JobDB() jobLoggingDB = JobLoggingDB() taskQueueDB = TaskQueueDB() return S_OK()
def initialize(self, jobDB=False, logDB=False): """ Initialization of the Optimizer Agent. """ if not jobDB: self.jobDB = JobDB() else: self.jobDB = jobDB if not logDB: self.logDB = JobLoggingDB() else: self.logDB = logDB trailing = "Agent" optimizerName = self.am_getModuleParam('agentName') if optimizerName[-len(trailing):].find(trailing) == 0: optimizerName = optimizerName[:-len(trailing)] self.am_setModuleParam('optimizerName', optimizerName) self.startingMinorStatus = self.am_getModuleParam('optimizerName') self.startingMajorStatus = "Checking" self.failedStatus = self.am_getOption("FailedJobStatus", 'Failed') self.requiredJobInfo = 'jdl' self.am_setOption("PollingTime", 30) return self.initializeOptimizer()
def __init__(self, pilotAgentsDB=None, jobDB=None, tqDB=None, jlDB=None, opsHelper=None): """ c'tor """ if pilotAgentsDB: self.pilotAgentsDB = pilotAgentsDB else: self.pilotAgentsDB = PilotAgentsDB() if jobDB: self.jobDB = jobDB else: self.jobDB = JobDB() if tqDB: self.tqDB = tqDB else: self.tqDB = TaskQueueDB() if jlDB: self.jlDB = jlDB else: self.jlDB = JobLoggingDB() if opsHelper: self.opsHelper = opsHelper else: self.opsHelper = Operations() self.log = gLogger.getSubLogger("Matcher") self.limiter = Limiter(jobDB=self.jobDB, opsHelper=self.opsHelper) self.siteClient = SiteStatus()
def initializeJobMonitoringHandler(serviceInfo): global gJobDB, gJobLoggingDB, gTaskQueueDB gJobDB = JobDB() gJobLoggingDB = JobLoggingDB() gTaskQueueDB = TaskQueueDB() return S_OK()
def initialize(self): """ Sets defaults """ self.am_setOption("PollingTime", 120) self.jobDB = JobDB() self.taskQueueDB = TaskQueueDB() self.jobLoggingDB = JobLoggingDB() # self.sandboxDB = SandboxDB( 'SandboxDB' ) agentTSTypes = self.am_getOption('ProductionTypes', []) if agentTSTypes: self.prod_types = agentTSTypes else: self.prod_types = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge']) gLogger.info( "Will exclude the following Production types from cleaning %s" % (', '.join(self.prod_types))) self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce', 500) self.jobByJob = self.am_getOption('JobByJob', False) self.throttlingPeriod = self.am_getOption('ThrottlingPeriod', 0.) self.removeStatusDelay['Done'] = self.am_getOption( 'RemoveStatusDelay/Done', 7) self.removeStatusDelay['Killed'] = self.am_getOption( 'RemoveStatusDelay/Killed', 7) self.removeStatusDelay['Failed'] = self.am_getOption( 'RemoveStatusDelay/Failed', 7) self.removeStatusDelay['Any'] = self.am_getOption( 'RemoveStatusDelay/Any', -1) return S_OK()
def checkDBAccess(cls): # Init DB if there if not JobState.__db.checked: JobState.__db.jobDB = JobDB() JobState.__db.logDB = JobLoggingDB() JobState.__db.tqDB = TaskQueueDB() JobState.__db.checked = True
def initialize(self, jobDB=None, logDB=None): """Initialization of the Optimizer Agent.""" self.jobDB = JobDB() if jobDB is None else jobDB if not self.jobDB.isValid(): dExit(1) useESForJobParametersFlag = Operations().getValue( "/Services/JobMonitoring/useESForJobParametersFlag", False) if useESForJobParametersFlag: try: result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.ElasticJobParametersDB", "ElasticJobParametersDB") if not result["OK"]: return result self.elasticJobParametersDB = result["Value"]() except RuntimeError as excp: return S_ERROR("Can't connect to DB: %s" % excp) self.logDB = JobLoggingDB() if logDB is None else logDB optimizerName = self.am_getModuleParam("agentName") if optimizerName.endswith("Agent"): optimizerName = optimizerName[:-len("Agent")] self.am_setModuleParam("optimizerName", optimizerName) self.startingMinorStatus = self.am_getModuleParam("optimizerName") self.failedStatus = self.am_getOption("FailedJobStatus", JobStatus.FAILED) self.am_setOption("PollingTime", 30) return self.initializeOptimizer()
def initializeJobStateUpdateHandler(serviceInfo): global jobDB global logDB jobDB = JobDB() logDB = JobLoggingDB() return S_OK()
def initializeJobManagerHandler(serviceInfo): global gJobDB, gJobLoggingDB, gtaskQueueDB gJobDB = JobDB() gJobLoggingDB = JobLoggingDB() gtaskQueueDB = TaskQueueDB() return S_OK()
def initializeMatcherHandler(serviceInfo): """ Matcher Service initialization """ global gJobDB global gTaskQueueDB global jlDB global pilotAgentsDB gJobDB = JobDB() gTaskQueueDB = TaskQueueDB() jlDB = JobLoggingDB() pilotAgentsDB = PilotAgentsDB() gMonitor.registerActivity('matchTime', "Job matching time", 'Matching', "secs", gMonitor.OP_MEAN, 300) gMonitor.registerActivity('matchesDone', "Job Match Request", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('matchesOK', "Matched jobs", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('numTQs', "Number of Task Queues", 'Matching', "tqsk queues", gMonitor.OP_MEAN, 300) gTaskQueueDB.recalculateTQSharesForAll() gThreadScheduler.addPeriodicTask(120, gTaskQueueDB.recalculateTQSharesForAll) gThreadScheduler.addPeriodicTask(60, sendNumTaskQueues) sendNumTaskQueues() return S_OK()
def cleanTaskQueues(): tqDB = TaskQueueDB() jobDB = JobDB() logDB = JobLoggingDB() result = tqDB.enableAllTaskQueues() if not result['OK']: return result result = tqDB.findOrphanJobs() if not result['OK']: return result for jid in result['Value']: result = tqDB.deleteJob(jid) if not result['OK']: gLogger.error("Cannot delete from TQ job %s" % jid, result['Message']) continue result = jobDB.rescheduleJob(jid) if not result['OK']: gLogger.error("Cannot reschedule in JobDB job %s" % jid, result['Message']) continue result = logDB.addLoggingRecord(jid, JobStatus.RECEIVED, "", "", source="JobState") if not result['OK']: gLogger.error("Cannot add logging record in JobLoggingDB %s" % jid, result['Message']) continue return S_OK()
def initialize(self): """ Standard constructor """ self.jobDB = JobDB() self.jobLoggingDB = JobLoggingDB() self._optimizers = {} self.am_setOption("PollingTime", 30) return S_OK()
def initialize(self): """Sets default parameters""" self.jobDB = JobDB() self.logDB = JobLoggingDB() # getting parameters if not self.am_getOption("Enable", True): self.log.info("Stalled Job Agent running in disabled mode") wms_instance = getSystemInstance("WorkloadManagement") if not wms_instance: return S_ERROR( "Can not get the WorkloadManagement system instance") self.stalledJobsTolerantSites = self.am_getOption( "StalledJobsTolerantSites", []) self.stalledJobsToleranceTime = self.am_getOption( "StalledJobsToleranceTime", 0) self.stalledJobsToRescheduleSites = self.am_getOption( "StalledJobsToRescheduleSites", []) self.submittingTime = self.am_getOption("SubmittingTime", self.submittingTime) self.matchedTime = self.am_getOption("MatchedTime", self.matchedTime) self.rescheduledTime = self.am_getOption("RescheduledTime", self.rescheduledTime) wrapperSection = cfgPath("Systems", "WorkloadManagement", wms_instance, "JobWrapper") failedTime = self.am_getOption("FailedTimeHours", 6) watchdogCycle = gConfig.getValue( cfgPath(wrapperSection, "CheckingTime"), 30 * 60) watchdogCycle = max( watchdogCycle, gConfig.getValue(cfgPath(wrapperSection, "MinCheckingTime"), 20 * 60)) stalledTime = self.am_getOption("StalledTimeHours", 2) self.log.verbose("", "StalledTime = %s cycles" % (stalledTime)) self.stalledTime = int(watchdogCycle * (stalledTime + 0.5)) self.log.verbose("", "FailedTime = %s cycles" % (failedTime)) # Add half cycle to avoid race conditions self.failedTime = int(watchdogCycle * (failedTime + 0.5)) self.minorStalledStatuses = ( JobMinorStatus.STALLED_PILOT_NOT_RUNNING, "Stalling for more than %d sec" % self.failedTime, ) # setting up the threading maxNumberOfThreads = self.am_getOption("MaxNumberOfThreads", 15) self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads) self.threadPoolExecutor = concurrent.futures.ThreadPoolExecutor( max_workers=maxNumberOfThreads) return S_OK()
def initialize(self): """Sets default parameters """ self.jobDB = JobDB() self.logDB = JobLoggingDB() self.am_setOption('PollingTime', 60 * 60) if not self.am_getOption('Enable', True): self.log.info('Stalled Job Agent running in disabled mode') return S_OK()
def initializeHandler(cls, svcInfoDict): cls.gJobDB = JobDB() cls.gJobLoggingDB = JobLoggingDB() cls.gTaskQueueDB = TaskQueueDB() cls.gElasticJobParametersDB = None useESForJobParametersFlag = Operations().getValue( '/Services/JobMonitoring/useESForJobParametersFlag', False) if useESForJobParametersFlag: cls.gElasticJobParametersDB = ElasticJobParametersDB() return S_OK()
def initializeHandler(cls, svcInfoDict): """ Determines the switching of ElasticSearch and MySQL backends """ cls.jobDB = JobDB() cls.jobLoggingDB = JobLoggingDB() cls.elasticJobParametersDB = None useESForJobParametersFlag = Operations().getValue( '/Services/JobMonitoring/useESForJobParametersFlag', False) if useESForJobParametersFlag: cls.elasticJobParametersDB = ElasticJobParametersDB() return S_OK()
def __updateJobStatus(self, jobID, status, minorstatus=None): """ This method updates the job status in the JobDB FIXME: Use the JobStateUpdate service instead of the JobDB """ self.log.verbose( "self.jobDB.setJobAttribute(%s,'Status','%s',update=True)" % (jobID, status)) from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB jobDB = JobDB() if self.enabled: result = jobDB.setJobAttribute(jobID, 'Status', status, update=True) else: return S_OK('DisabledMode') if not result['OK']: self.log.error("Failed to update job status", result['Message']) raise RuntimeError("Failed to update job status") if minorstatus is None: #Retain last minor status for stalled jobs result = jobDB.getJobAttributes(jobID, ['MinorStatus']) if result['OK']: minorstatus = result['Value']['MinorStatus'] else: self.log.error("Failed to get Minor Status", result['Message']) raise RuntimeError("Failed to get Minorstatus") else: self.log.verbose( "self.jobDB.setJobAttribute(%s,'MinorStatus','%s',update=True)" % (jobID, minorstatus)) result = jobDB.setJobAttribute(jobID, 'MinorStatus', minorstatus, update=True) logStatus = status from DIRAC.WorkloadManagementSystem.DB.JobLoggingDB import JobLoggingDB result = JobLoggingDB().addLoggingRecord(jobID, status=logStatus, minor=minorstatus, source='DataRecoveryAgent') if not result['OK']: ## just the logging entry, no big loss so no exception self.log.warn(result) return result
def initializeMatcherHandler(serviceInfo): """ Matcher Service initialization """ global gJobDB global gJobLoggingDB global gTaskQueueDB global gPilotAgentsDB # Create JobDB object and initialize its tables. gJobDB = JobDB() res = gJobDB._checkTable() if not res['OK']: return res # Create JobLoggingDB object and initialize its tables. gJobLoggingDB = JobLoggingDB() res = gJobLoggingDB._checkTable() if not res['OK']: return res gTaskQueueDB = TaskQueueDB() # Create PilotAgentsDB object and initialize its tables. gPilotAgentsDB = PilotAgentsDB() res = gPilotAgentsDB._checkTable() if not res['OK']: return res gMonitor.registerActivity('matchTime', "Job matching time", 'Matching', "secs", gMonitor.OP_MEAN, 300) gMonitor.registerActivity('matchesDone', "Job Match Request", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('matchesOK', "Matched jobs", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('numTQs', "Number of Task Queues", 'Matching', "tqsk queues", gMonitor.OP_MEAN, 300) gTaskQueueDB.recalculateTQSharesForAll() gThreadScheduler.addPeriodicTask(120, gTaskQueueDB.recalculateTQSharesForAll) gThreadScheduler.addPeriodicTask(60, sendNumTaskQueues) sendNumTaskQueues() return S_OK()
def initializeJobManagerHandler(serviceInfo): global gJobDB, gJobLoggingDB, gtaskQueueDB, enablePilotsLogging, gPilotAgentsDB, gPilotsLoggingDB gJobDB = JobDB() gJobLoggingDB = JobLoggingDB() gtaskQueueDB = TaskQueueDB() gPilotAgentsDB = PilotAgentsDB() # there is a problem with accessing CS with shorter paths, so full path is extracted from serviceInfo dict enablePilotsLogging = gConfig.getValue( serviceInfo['serviceSectionPath'].replace('JobManager', 'PilotsLogging') + '/Enable', 'False').lower() in ('yes', 'true') if enablePilotsLogging: gPilotsLoggingDB = PilotsLoggingDB() return S_OK()
def initialize(self): """Sets default parameters """ self.jobDB = JobDB() self.logDB = JobLoggingDB() self.am_setOption('PollingTime', 60 * 60) if not self.am_getOption('Enable', True): self.log.info('Stalled Job Agent running in disabled mode') # setting up the threading maxNumberOfThreads = self.am_getOption('MaxNumberOfThreads', 15) threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads) self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads) for _ in range(maxNumberOfThreads): threadPool.generateJobAndQueueIt(self._execute) return S_OK()
def initializeHandler(cls, serviceInfoDict): """ Initialization of DB objects and OptimizationMind """ cls.jobDB = JobDB() cls.jobLoggingDB = JobLoggingDB() cls.taskQueueDB = TaskQueueDB() cls.pilotAgentsDB = PilotAgentsDB() cls.pilotsLoggingDB = None enablePilotsLogging = Operations().getValue( '/Services/JobMonitoring/usePilotsLoggingFlag', False) if enablePilotsLogging: cls.pilotsLoggingDB = PilotsLoggingDB() cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK()
def initialize( self ): """Sets defaults """ self.am_setOption( "PollingTime", 60 ) self.jobDB = JobDB() self.taskQueueDB = TaskQueueDB() self.jobLoggingDB = JobLoggingDB() # self.sandboxDB = SandboxDB( 'SandboxDB' ) agentTSTypes = self.am_getOption('ProductionTypes', []) if agentTSTypes: self.prod_types = agentTSTypes else: self.prod_types = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] ) gLogger.info('Will exclude the following Production types from cleaning %s'%(string.join(self.prod_types,', '))) self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce',200) self.jobByJob = self.am_getOption('JobByJob',True) self.throttlingPeriod = self.am_getOption('ThrottlingPeriod',0.) return S_OK()
def initialize(self, jobDB=None, logDB=None): """ Initialization of the Optimizer Agent. """ self.jobDB = JobDB() if jobDB is None else jobDB if not self.jobDB.isValid(): dExit(1) self.logDB = JobLoggingDB() if logDB is None else logDB optimizerName = self.am_getModuleParam('agentName') if optimizerName.endswith('Agent'): optimizerName = optimizerName[:-len('Agent')] self.am_setModuleParam('optimizerName', optimizerName) self.startingMinorStatus = self.am_getModuleParam('optimizerName') self.failedStatus = self.am_getOption("FailedJobStatus", 'Failed') self.am_setOption("PollingTime", 30) return self.initializeOptimizer()
def initializeHandler(cls, serviceInfoDict): cls.jobDB = JobDB() cls.jobLoggingDB = JobLoggingDB() cls.taskQueueDB = TaskQueueDB() cls.pilotAgentsDB = PilotAgentsDB() cls.limiter = Limiter(jobDB=cls.jobDB) cls.taskQueueDB.recalculateTQSharesForAll() gMonitor.registerActivity('matchTime', "Job matching time", 'Matching', "secs", gMonitor.OP_MEAN, 300) gMonitor.registerActivity('matchesDone', "Job Match Request", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('matchesOK', "Matched jobs", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('numTQs', "Number of Task Queues", 'Matching', "tqsk queues", gMonitor.OP_MEAN, 300) gThreadScheduler.addPeriodicTask(120, cls.taskQueueDB.recalculateTQSharesForAll) gThreadScheduler.addPeriodicTask(60, cls.sendNumTaskQueues) cls.sendNumTaskQueues() return S_OK()
def initializeMatcherHandler( serviceInfo ): """ Matcher Service initialization """ global jobDB global jobLoggingDB global taskQueueDB jobDB = JobDB() jobLoggingDB = JobLoggingDB() taskQueueDB = TaskQueueDB() gMonitor.registerActivity( 'matchTime', "Job matching time", 'Matching', "secs" , gMonitor.OP_MEAN, 300 ) gMonitor.registerActivity( 'matchTaskQueues', "Task queues checked per job", 'Matching', "task queues" , gMonitor.OP_MEAN, 300 ) gMonitor.registerActivity( 'matchesDone', "Job Matches", 'Matching', "matches" , gMonitor.OP_MEAN, 300 ) gMonitor.registerActivity( 'numTQs', "Number of Task Queues", 'Matching', "tqsk queues" , gMonitor.OP_MEAN, 300 ) taskQueueDB.recalculateTQSharesForAll() gThreadScheduler.addPeriodicTask( 120, taskQueueDB.recalculateTQSharesForAll ) gThreadScheduler.addPeriodicTask( 120, sendNumTaskQueues ) sendNumTaskQueues() return S_OK()
def __init__(self, jid, source="Unknown"): self.__jid = jid self.__source = str(source) self.jobDB = JobDB() self.logDB = JobLoggingDB() self.tqDB = TaskQueueDB()
def setUp(self): print self.jlogDB = JobLoggingDB('Test', 20)
def setUp(self): self.jlogDB = JobLoggingDB()